Merge branch 'master' into ingestmetadata-query

This commit is contained in:
nishantmonu51 2015-01-15 18:00:31 +05:30
commit c7452b75f6
180 changed files with 11432 additions and 1469 deletions

7
.travis.yml Normal file
View File

@ -0,0 +1,7 @@
language: java
jdk:
- oraclejdk7
- oraclejdk8
sudo: false

View File

@ -60,6 +60,7 @@ public class Execs
/**
* @param nameFormat nameformat for threadFactory
* @param capacity maximum capacity after which the executorService will block on accepting new tasks
*
* @return ExecutorService which blocks accepting new tasks when the capacity reached
*/
public static ExecutorService newBlockingSingleThreaded(final String nameFormat, final int capacity)

View File

@ -0,0 +1,44 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.timeline;
import io.druid.timeline.partition.PartitionHolder;
import org.joda.time.Interval;
import java.util.List;
public interface TimelineLookup<VersionType, ObjectType>
{
/**
* Does a lookup for the objects representing the given time interval. Will *only* return
* PartitionHolders that are complete.
*
* @param interval interval to find objects for
*
* @return Holders representing the interval that the objects exist for, PartitionHolders
* are guaranteed to be complete
*/
public Iterable<TimelineObjectHolder<VersionType, ObjectType>> lookup(Interval interval);
public PartitionHolder<ObjectType> findEntry(Interval interval, VersionType version);
}

View File

@ -0,0 +1,65 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.timeline;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import io.druid.timeline.partition.PartitionHolder;
import org.joda.time.Interval;
public class UnionTimeLineLookup<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
{
Iterable<TimelineLookup<VersionType, ObjectType>> delegates;
public UnionTimeLineLookup(Iterable<TimelineLookup<VersionType, ObjectType>> delegates)
{
this.delegates = delegates;
}
@Override
public Iterable<TimelineObjectHolder<VersionType, ObjectType>> lookup(final Interval interval)
{
return Iterables.concat(
Iterables.transform(
delegates,
new Function<TimelineLookup<VersionType, ObjectType>, Iterable<TimelineObjectHolder<VersionType, ObjectType>>>()
{
@Override
public Iterable<TimelineObjectHolder<VersionType, ObjectType>> apply(TimelineLookup<VersionType, ObjectType> input)
{
return input.lookup(interval);
}
}
)
);
}
public PartitionHolder<ObjectType> findEntry(Interval interval, VersionType version)
{
for (TimelineLookup<VersionType, ObjectType> delegate : delegates) {
final PartitionHolder<ObjectType> entry = delegate.findEntry(interval, version);
if (entry != null) {
return entry;
}
}
return null;
}
}

View File

@ -58,7 +58,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
* to achieve "atomic" updates. First add new items, then check if those items caused anything to be overshadowed, if
* so, remove the overshadowed elements and you have effectively updated your data set without any user impact.
*/
public class VersionedIntervalTimeline<VersionType, ObjectType>
public class VersionedIntervalTimeline<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
{
private static final Logger log = new Logger(VersionedIntervalTimeline.class);

View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
~ Druid - a distributed column store.
~ Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
~
~ This program is free software; you can redistribute it and/or
~ modify it under the terms of the GNU General Public License
~ as published by the Free Software Foundation; either version 2
~ of the License, or (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU General Public License for more details.
~
~ You should have received a copy of the GNU General Public License
~ along with this program; if not, write to the Free Software
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="ConsoleAppender" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{ISO8601} %p [%t] %c - %m%n"/>
</layout>
</appender>
<root>
<priority value ="info" />
<appender-ref ref="ConsoleAppender"/>
</root>
</log4j:configuration>

View File

@ -10,7 +10,7 @@
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/druid-io/druid/wiki">Documentation</a></li>
</ul>
</div>
</div>

View File

@ -88,10 +88,27 @@ The spec\_file is a path to a file that contains JSON and an example looks like:
},
"tuningConfig" : {
"type" : "hadoop",
"workingPath": "/tmp",
"partitionsSpec" : {
"type" : "dimension",
"partitionDimension" : null,
"targetPartitionSize" : 5000000,
"jobProperties": {
"mapreduce.job.queuename": "default"
}
"maxPartitionSize" : 7500000,
"assumeGrouped" : false,
"numShards" : -1
},
"shardSpecs" : { },
"leaveIntermediate" : false,
"cleanupOnFailure" : true,
"overwriteFiles" : false,
"ignoreInvalidRows" : false,
"jobProperties" : { },
"combineText" : false,
"persistInHeap" : false,
"ingestOffheap" : false,
"bufferSize" : 134217728,
"aggregationBufferRatio" : 0.5,
"rowFlushBoundary" : 300000
}
}
```

View File

@ -25,7 +25,7 @@ Clone the code from [https://github.com/druid-io/whirr](https://github.com/druid
git checkout trunk
mvn clean install -Dmaven.test.failure.ignore=true
In order to run the test below, you'll also need two files that available only from a [standard install of Druid](http://druid.io/downloads.html) or the [Druid repo](https://github.com/metamx/druid/tree/master/examples/bin/examples):
In order to run the test below, you'll also need two files that available only from a [standard install of Druid](http://druid.io/downloads.html) or the [Druid repo](https://github.com/druid-io/druid/tree/master/examples/bin/examples):
* `druid/examples/bin/examples/wikipedia/wikipedia_realtime.spec`
* `druid/examples/bin/examples/indexing/wikipedia_realtime_task.json`

View File

@ -8,34 +8,93 @@ For general Broker Node information, see [here](Broker.html).
Runtime Configuration
---------------------
The broker module uses several of the default modules in [Configuration](Configuration.html) and has the following set of configurations as well:
The broker node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
### Node Configs
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
### Query Configs
#### Query Prioritization
|Property|Possible Values|Description|Default|
|--------|---------------|-----------|-------|
|`druid.broker.balancer.type`|`random`, `connectionCount`|Determines how the broker balances connections to historical nodes. `random` choose randomly, `connectionCount` picks the node with the fewest number of active connections to|`random`|
|`druid.broker.select.tier`|`highestPriority`, `lowestPriority`, `custom`|If segments are cross-replicated across tiers in a cluster, you can tell the broker to prefer to select segments in a tier with a certain priority.|`highestPriority`|
|`druid.broker.select.tier.custom.priorities`|`An array of integer priorities.`|Select servers in tiers with a custom priority list.|None|
|`druid.broker.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|`druid.broker.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
|`druid.broker.cache.numBackgroundThreads`|Non-negative integer|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best. |`0`|
#### Concurrent Requests
Druid uses Jetty to serve HTTP requests.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
|`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time nodes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|5|
|`druid.broker.http.readTimeout`|The timeout for data reads.|PT15M|
#### Processing
The broker only uses processing configs for nested groupBy queries.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
#### General Query Configuration
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.chunkPeriod`|Long-interval queries (of any type) may be broken into shorter interval queries, reducing the impact on resources. Use ISO 8601 periods. For example, if this property is set to `P1M` (one month), then a query covering a year would be broken into 12 smaller queries. |0 (off)|
##### GroupBy Query Config
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
##### Search Query Config
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
### Caching
You can optionally only configure caching to be enabled on the broker by setting caching configs here.
|Property|Possible Values|Description|Default|
|--------|---------------|-----------|-------|
|`druid.broker.cache.useCache`|Enable the cache on the broker.|false|
|`druid.broker.cache.populateCache`|Populate the cache on the broker.|false|
|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
#### Local Cache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.broker.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
|`druid.broker.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
|`druid.broker.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
|`druid.broker.cache.numBackgroundThreads`|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best. |`0`|
|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
#### Memcache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.broker.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
|`druid.broker.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
|`druid.broker.cache.hosts`|Comma separated list of Memcached hosts `<host:port>`.|none|
|`druid.broker.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
|`druid.broker.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|

View File

@ -6,7 +6,7 @@ layout: doc_page
The other way to setup Druid is from source via git. To do so, run these commands:
```
git clone git@github.com:metamx/druid.git
git clone git@github.com:druid-io/druid.git
cd druid
./build.sh
```

View File

@ -4,7 +4,7 @@ layout: doc_page
# Configuring Druid
This describes the basic server configuration that is loaded by all Druid server processes; the same file is loaded by all. See also the JSON "specFile" descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
This describes the common configuration shared by all Druid nodes. These configurations can be defined in the `common.runtime.properties` file.
## JVM Configuration Best Practices
@ -14,51 +14,17 @@ There are three JVM parameters that we set on all of our processes:
2. `-Dfile.encoding=UTF-8` This is similar to timezone, we test assuming UTF-8. Local encodings might work, but they also might result in weird and interesting bugs.
3. `-Djava.io.tmpdir=<a path>` Various parts of the system that interact with the file system do it via temporary files, and these files can get somewhat large. Many production systems are set up to have small (but fast) `/tmp` directories, which can be problematic with Druid so we recommend pointing the JVMs tmp directory to something with a little more meat.
## Modules
### Extensions
As of Druid v0.6, most core Druid functionality has been compartmentalized into modules. There are a set of default modules that may apply to any node type, and there are specific modules for the different node types. Default modules are __lazily instantiated__. Each module has its own set of configuration.
This page describes the configuration of the default modules. Node-specific configuration is discussed on each node's respective page. In addition, you can add custom modules to [extend Druid](Modules.html).
Configuration of the various modules is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath.
Note: as a future item, wed like to consolidate all of the various configuration into a yaml/JSON based configuration file.
### Emitter Module
The Druid servers emit various metrics and alerts via something we call an Emitter. There are two emitter implementations included with the code, one that just logs to log4j ("logging", which is used by default if no emitter is specified) and one that does POSTs of JSON events to a server ("http"). The properties for using the logging emitter are described below.
Many of Druid's external dependencies can be plugged in as modules. Extensions can be provided using the following configs:
|Property|Description|Default|
|--------|-----------|-------|
|`druid.emitter`|Setting this value to either "logging" or "http" will instantialize one of the emitter modules.|logging|
|`druid.extensions.remoteRepositories`|If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/","https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]|
|`druid.extensions.localRepository`|The local maven directory where extensions are installed. If this is set, remoteRepositories is not required.|[]|
|`druid.extensions.coordinates`|The list of extensions to include.|[]|
#### Logging Emitter Module
|Property|Description|Default|
|--------|-----------|-------|
|`druid.emitter.logging.loggerClass`|Choices: HttpPostEmitter, LoggingEmitter, NoopServiceEmitter, ServiceEmitter. The class used for logging.|LoggingEmitter|
|`druid.emitter.logging.logLevel`|Choices: debug, info, warn, error. The log level at which message are logged.|info|
#### Http Emitter Module
|Property|Description|Default|
|--------|-----------|-------|
|`druid.emitter.http.timeOut`|The timeout for data reads.|PT5M|
|`druid.emitter.http.flushMillis`|How often to internal message buffer is flushed (data is sent).|60000|
|`druid.emitter.http.flushCount`|How many messages can the internal message buffer hold before flushing (sending).|500|
|`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none|
### Http Client Module
This is the HTTP client used by [Broker](Broker.html) nodes.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time nodes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|5|
|`druid.broker.http.readTimeout`|The timeout for data reads.|PT15M|
### Curator Module
### Zookeeper
Druid uses [Curator](http://curator.incubator.apache.org/) for all [Zookeeper](http://zookeeper.apache.org/) interactions.
@ -68,110 +34,36 @@ Druid uses [Curator](http://curator.incubator.apache.org/) for all [Zookeeper](h
|`druid.zk.service.sessionTimeoutMs`|ZooKeeper session timeout, in milliseconds.|30000|
|`druid.curator.compress`|Boolean flag for whether or not created Znodes should be compressed.|false|
### Announcer Module
The announcer module is used to announce and unannounce Znodes in ZooKeeper (using Curator).
#### ZooKeeper Paths
See [ZooKeeper](ZooKeeper.html).
#### Data Segment Announcer
Data segment announcers are used to announce segments.
We recommend just setting the base ZK path, but all ZK paths that Druid uses can be overwritten.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|batch|
|`druid.zk.paths.base`|Base Zookeeper path.|druid|
|`druid.zk.paths.propertiesPath`|Zookeeper properties path.|druid/properties|
|`druid.zk.paths.announcementsPath`|Druid node announcement path.|druid/announcements|
|`druid.zk.paths.liveSegmentsPath`|Current path for where Druid nodes announce their segments.|druid/segments|
|`druid.zk.paths.loadQueuePath`|Entries here cause historical nodes to load and drop segments.|druid/loadQueue|
|`druid.zk.paths.coordinatorPath`|Used by the coordinator for leader election.|druid/coordinator|
|`druid.zk.paths.servedSegmentsPath`|@Deprecated. Legacy path for where Druid nodes announce their segments.|druid/servedSegments|
##### Single Data Segment Announcer
In legacy Druid, each segment served by a node would be announced as an individual Znode.
##### Batch Data Segment Announcer
In current Druid, multiple data segments may be announced under the same Znode.
The indexing service also uses its own set of paths. These configs can be included in the common configuration.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.announcer.segmentsPerNode`|Each Znode contains info for up to this many segments.|50|
|`druid.announcer.maxBytesPerNode`|Max byte size for Znode.|524288|
|`druid.zk.paths.indexer.announcementsPath`|Middle managers announce themselves here.|druid/indexer/announcements|
|`druid.zk.paths.indexer.tasksPath`|Used to assign tasks to middle managers.|druid/indexer/tasks|
|`druid.zk.paths.indexer.statusPath`|Parent path for announcement of task statuses.|druid/indexer/status|
|`druid.zk.paths.indexer.leaderLatchPath`|Used for Overlord leader election.|druid/indexer/leaderLatchPath|
### Druid Processing Module
This module contains query processing functionality.
The following path is used service discovery.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|/druid/discovery|
### Request Logging
### Metrics Module
The metrics module is used to track Druid metrics.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.monitoring.emissionPeriod`|How often metrics are emitted.|PT1m|
|`druid.monitoring.monitors`|Sets list of Druid monitors used by a node. Each monitor is specified as `com.metamx.metrics.<monitor-name>` (see below for names and more information). For example, you can specify monitors for a Broker with `druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]`.|none (no monitors)|
The following monitors are available:
* CacheMonitor &ndash; Emits metrics (to logs) about the segment results cache for Historical and Broker nodes. Reports typical cache statistics include hits, misses, rates, and size (bytes and number of entries), as well as timeouts and and errors.
* SysMonitor &ndash; This uses the [SIGAR library](http://www.hyperic.com/products/sigar) to report on various system activities and statuses.
* ServerMonitor &ndash; Reports statistics on Historical nodes.
* JvmMonitor &ndash; Reports JVM-related statistics.
* RealtimeMetricsMonitor &ndash; Reports statistics on Realtime nodes.
### Server Module
This module is used for Druid server nodes.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
### Storage Node Module
This module is used by nodes that store data (Historical and Realtime).
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.maxSize`|The maximum number of bytes-worth of segments that the node wants assigned to it. This is not a limit that Historical nodes actually enforce, just a value published to the Coordinator node so it can plan accordingly.|0|
|`druid.server.tier`| A string to name the distribution tier that the storage node belongs to. Many of the [rules Coordinator nodes use](Rule-Configuration.html) to manage segments can be keyed on tiers. | `_default_tier` |
|`druid.server.priority`|In a tiered architecture, the priority of the tier, thus allowing control over which nodes are queried. Higher numbers mean higher priority. The default (no priority) works for architecture with no cross replication (tiers that have no data-storage overlap). Data centers typically have equal priority. | 0 |
#### Segment Cache
Druid storage nodes maintain information about segments they have already downloaded, and a disk cache to store that data.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.segmentCache.locations`|Segments assigned to a Historical node are first stored on the local file system (in a disk cache) and then served by the Historical node. These locations define where that local cache resides. | none (no caching) |
|`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true|
|`druid.segmentCache.dropSegmentDelayMillis`|How long a node delays before completely dropping segment.|30000 (30 seconds)|
|`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the Coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir|
|`druid.segmentCache.announceIntervalMillis`|How frequently to announce segments while segments are loading from cache. Set this value to zero to wait for all segments to be loaded before announcing.|5000 (5 seconds)|
|`druid.segmentCache.numLoadingThreads`|How many segments to load concurrently from from deep storage.|1|
### Jetty Server Module
Druid uses Jetty to serve HTTP requests.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
### Queryable Module
This module is used by all nodes that can serve queries.
All nodes that can serve queries can also log the requests they see.
|Property|Description|Default|
|--------|-----------|-------|
@ -193,58 +85,54 @@ Every request is emitted to some external location.
|--------|-----------|-------|
|`druid.request.logging.feed`|Feed name for requests.|none|
### Query Runner Factory Module
### Enabling Metrics
This module is required by nodes that can serve queries.
Druid nodes periodically emit metrics and different metrics monitors can be included. Each node can overwrite the default list of monitors.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.chunkPeriod`|Long-interval queries (of any type) may be broken into shorter interval queries, reducing the impact on resources. Use ISO 8601 periods. For example, if this property is set to `P1M` (one month), then a query covering a year would be broken into 12 smaller queries. |0 (off)|
|`druid.monitoring.emissionPeriod`|How often metrics are emitted.|PT1m|
|`druid.monitoring.monitors`|Sets list of Druid monitors used by a node. Each monitor is specified as `com.metamx.metrics.<monitor-name>` (see below for names and more information). For example, you can specify monitors for a Broker with `druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]`.|none (no monitors)|
#### GroupBy Query Config
The following monitors are available:
* CacheMonitor &ndash; Emits metrics (to logs) about the segment results cache for Historical and Broker nodes. Reports typical cache statistics include hits, misses, rates, and size (bytes and number of entries), as well as timeouts and and errors.
* SysMonitor &ndash; This uses the [SIGAR library](http://www.hyperic.com/products/sigar) to report on various system activities and statuses.
* ServerMonitor &ndash; Reports statistics on Historical nodes.
* JvmMonitor &ndash; Reports JVM-related statistics.
* RealtimeMetricsMonitor &ndash; Reports statistics on Realtime nodes.
### Emitting Metrics
The Druid servers emit various metrics and alerts via something we call an Emitter. There are three emitter implementations included with the code, a "noop" emitter, one that just logs to log4j ("logging", which is used by default if no emitter is specified) and one that does POSTs of JSON events to a server ("http"). The properties for using the logging emitter are described below.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
|`druid.emitter`|Setting this value to "noop", "logging", or "http" will instantialize one of the emitter modules.|logging|
#### Search Query Config
#### Logging Emitter Module
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
|`druid.emitter.logging.loggerClass`|Choices: HttpPostEmitter, LoggingEmitter, NoopServiceEmitter, ServiceEmitter. The class used for logging.|LoggingEmitter|
|`druid.emitter.logging.logLevel`|Choices: debug, info, warn, error. The log level at which message are logged.|info|
### Discovery Module
The discovery module is used for service discovery.
#### Http Emitter Module
|Property|Description|Default|
|--------|-----------|-------|
|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|/druid/discovery|
|`druid.emitter.http.timeOut`|The timeout for data reads.|PT5M|
|`druid.emitter.http.flushMillis`|How often to internal message buffer is flushed (data is sent).|60000|
|`druid.emitter.http.flushCount`|How many messages can the internal message buffer hold before flushing (sending).|500|
|`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none|
### Metadata Storage
#### Indexing Service Discovery Module
This module is used to find the [Indexing Service](Indexing-Service.html) using Curator service discovery.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.selectors.indexing.serviceName`|The druid.service name of the indexing service Overlord node. To start the Overlord with a different name, set it with this property. |overlord|
### Server Inventory View Module
This module is used to read announcements of segments in ZooKeeper. The configs are identical to the Announcer Module.
### Database Connector Module
These properties specify the jdbc connection and other configuration around the database. The only processes that connect to the DB with these properties are the [Coordinator](Coordinator.html) and [Indexing service](Indexing-service.html). This is tested on metadata storage.
These properties specify the jdbc connection and other configuration around the metadata storage. The only processes that connect to the metadata storage with these properties are the [Coordinator](Coordinator.html) and [Indexing service](Indexing-service.html).
|Property|Description|Default|
|--------|-----------|-------|
|`druid.metadata.storage.type`|The type of metadata storage to use. Choose from "mysql", "postgres", or "derby".|derby|
|`druid.metadata.storage.connector.user`|The username to connect with.|none|
|`druid.metadata.storage.connector.password`|The password to connect with.|none|
|`druid.metadata.storage.connector.createTables`|If Druid requires a table and it doesn't exist, create it?|true|
@ -258,18 +146,9 @@ These properties specify the jdbc connection and other configuration around the
|`druid.metadata.storage.tables.taskLog`|Used by the indexing service to store task logs.|druid_taskLog|
|`druid.metadata.storage.tables.taskLock`|Used by the indexing service to store task locks.|druid_taskLock|
### Jackson Config Manager Module
### Deep Storage
The Jackson Config manager reads and writes config entries from the Druid config table using [Jackson](http://jackson.codehaus.org/).
|Property|Description|Default|
|--------|-----------|-------|
|`druid.manager.config.pollDuration`|How often the manager polls the config table for updates.|PT1m|
### DataSegment Pusher/Puller Module
This module is used to configure Druid deep storage. The configurations concern how to push and pull [Segments](Segments.html) from deep storage.
The configurations concern how to push and pull [Segments](Segments.html) from deep storage.
|Property|Description|Default|
|--------|-----------|-------|
@ -293,22 +172,14 @@ This deep storage is used to interface with Amazon's S3.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.s3.accessKey`|The access key to use to access S3.|none|
|`druid.s3.secretKey`|The secret key to use to access S3.|none|
|`druid.storage.bucket`|S3 bucket name.|none|
|`druid.storage.baseKey`|S3 object key prefix for storage.|none|
|`druid.storage.disableAcl`|Boolean flag for ACL.|false|
|`druid.storage.archiveBucket`|S3 bucket name for archiving when running the indexing-service *archive task*.|none|
|`druid.storage.archiveBaseKey`|S3 object key prefix for archiving.|none|
#### AWS Module
This module is used to interact with S3.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.s3.accessKey`|The access key to use to access S3.|none|
|`druid.s3.secretKey`|The secret key to use to access S3.|none|
#### HDFS Deep Storage
This deep storage is used to interface with HDFS.
@ -326,35 +197,62 @@ This deep storage is used to interface with Cassandra.
|`druid.storage.host`|Cassandra host.|none|
|`druid.storage.keyspace`|Cassandra key space.|none|
### Task Log Module
### Caching
This module is used to configure the [Indexing Service](Indexing-Service.html) task logs.
If you are using a distributed cache such as memcached, you can include the configuration here.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.type`|Choices:noop, s3, file. Where to store task logs|file|
|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
#### File Task Logs
Store task logs in the local filesystem.
#### Local Cache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.directory`|Local filesystem path.|log|
|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
#### S3 Task Logs
Store task logs in S3.
#### Memcache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.s3Bucket`|S3 bucket name.|none|
|`druid.indexer.logs.s3Prefix`|S3 key prefix.|none|
|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
#### Noop Task Logs
### Indexing Service Discovery
No task logs are actually stored.
This config is used to find the [Indexing Service](Indexing-Service.html) using Curator service discovery. Only required if you are actually running an indexing service.
### Firehose Module
|Property|Description|Default|
|--------|-----------|-------|
|`druid.selectors.indexing.serviceName`|The druid.service name of the indexing service Overlord node. To start the Overlord with a different name, set it with this property. |overlord|
The Firehose module lists all available firehoses. There are no configurations.
### Announcing Segments
You can optionally configure how to announce and unannounce Znodes in ZooKeeper (using Curator). For normal operations you do not need to override any of these configs.
#### Data Segment Announcer
Data segment announcers are used to announce segments.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|batch|
##### Single Data Segment Announcer
In legacy Druid, each segment served by a node would be announced as an individual Znode.
##### Batch Data Segment Announcer
In current Druid, multiple data segments may be announced under the same Znode.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.announcer.segmentsPerNode`|Each Znode contains info for up to this many segments.|50|
|`druid.announcer.maxBytesPerNode`|Max byte size for Znode.|524288|

View File

@ -8,16 +8,32 @@ For general Coordinator Node information, see [here](Coordinator.html).
Runtime Configuration
---------------------
The coordinator module uses several of the default modules in [Configuration](Configuration.html) and has the following set of configurations as well:
The coordinator node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
### Node Config
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
### Coordinator Operation
|Property|Description|Default|
|--------|-----------|-------|
|`druid.coordinator.period`|The run period for the coordinator. The coordinators operates by maintaining the current state of the world in memory and periodically looking at the set of segments available and segments being served to make decisions about whether any changes need to be made to the data topology. This property sets the delay between each of these runs.|PT60S|
|`druid.coordinator.period.indexingPeriod`|How often to send indexing tasks to the indexing service. Only applies if merge or conversion is turned on.|PT1800S (30 mins)|
|`druid.coordinator.startDelay`|The operation of the Coordinator works on the assumption that it has an up-to-date view of the state of the world when it runs, the current ZK interaction code, however, is written in a way that doesnt allow the Coordinator to know for a fact that its done loading the current state of the world. This delay is a hack to give it enough time to believe that it has all the data.|PT300S|
|`druid.coordinator.merge.on`|Boolean flag for whether or not the coordinator should try and merge small segments into a more optimal segment size.|PT300S|
|`druid.coordinator.merge.on`|Boolean flag for whether or not the coordinator should try and merge small segments into a more optimal segment size.|false|
|`druid.coordinator.conversion.on`|Boolean flag for converting old segment indexing versions to the latest segment indexing version.|false|
|`druid.coordinator.load.timeout`|The timeout duration for when the coordinator assigns a segment to a historical node.|15 minutes|
|`druid.coordinator.load.timeout`|The timeout duration for when the coordinator assigns a segment to a historical node.|PT15M|
### Metadata Retrieval
|Property|Description|Default|
|--------|-----------|-------|
|`druid.manager.config.pollDuration`|How often the manager polls the config table for updates.|PT1m|
|`druid.manager.segment.pollDuration`|The duration between polls the Coordinator does for updates to the set of active segments. Generally defines the amount of lag time it can take for the coordinator to notice new segments.|PT1M|
|`druid.manager.rules.pollDuration`|The duration between polls the Coordinator does for updates to the set of active rules. Generally defines the amount of lag time it can take for the coordinator to notice rules.|PT1M|
|`druid.manager.rules.defaultTier`|The default tier from which default rules will be loaded from.|_default|

View File

@ -4,7 +4,7 @@ layout: doc_page
Examples
========
The examples on this page are setup in order to give you a feel for what Druid does in practice. They are quick demos of Druid based on [CliRealtimeExample](https://github.com/metamx/druid/blob/master/services/src/main/java/io/druid/cli/CliRealtimeExample.java). While you wouldnt run it this way in production you should be able to see how ingestion works and the kind of exploratory queries that are possible. Everything that can be done on your box here can be scaled out to 10s of billions of events and terabytes of data per day in a production cluster while still giving the snappy responsive exploratory queries.
The examples on this page are setup in order to give you a feel for what Druid does in practice. They are quick demos of Druid based on [CliRealtimeExample](https://github.com/druid-io/druid/blob/master/services/src/main/java/io/druid/cli/CliRealtimeExample.java). While you wouldnt run it this way in production you should be able to see how ingestion works and the kind of exploratory queries that are possible. Everything that can be done on your box here can be scaled out to 10s of billions of events and terabytes of data per day in a production cluster while still giving the snappy responsive exploratory queries.
Installing Standalone Druid
---------------------------
@ -16,7 +16,7 @@ There are two options for installing standalone Druid. Building from source, and
Clone Druid and build it:
``` bash
git clone https://github.com/metamx/druid.git druid
git clone https://github.com/druid-io/druid.git druid
cd druid
git fetch --tags
git checkout druid-0.6.160
@ -37,7 +37,7 @@ Twitter Example
For a full tutorial based on the twitter example, check out this [Twitter Tutorial](Twitter-Tutorial.html).
This Example uses a feature of Twitter that allows for sampling of its stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/metamx/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
This Example uses a feature of Twitter that allows for sampling of its stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/druid-io/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
### What youll learn
* See how large amounts of data gets ingested into Druid in real-time

View File

@ -5,18 +5,98 @@ Historical Node Configuration
=============================
For general Historical Node information, see [here](Historical.html).
Runtime Configuration
---------------------
The historical module uses several of the default modules in [Configuration](Configuration.html) and has a few configs of its own.
The historical node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
#### Local Cache
### Node Configs
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
### General Configuration
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.tier`| A string to name the distribution tier that the storage node belongs to. Many of the [rules Coordinator nodes use](Rule-Configuration.html) to manage segments can be keyed on tiers. | `_default_tier` |
|`druid.server.priority`|In a tiered architecture, the priority of the tier, thus allowing control over which nodes are queried. Higher numbers mean higher priority. The default (no priority) works for architecture with no cross replication (tiers that have no data-storage overlap). Data centers typically have equal priority. | 0 |
### Storing Segments
|Property|Description|Default|
|--------|-----------|-------|
|`druid.segmentCache.locations`|Segments assigned to a Historical node are first stored on the local file system (in a disk cache) and then served by the Historical node. These locations define where that local cache resides. | none (no caching) |
|`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true|
|`druid.segmentCache.dropSegmentDelayMillis`|How long a node delays before completely dropping segment.|30000 (30 seconds)|
|`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the Coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir|
|`druid.segmentCache.announceIntervalMillis`|How frequently to announce segments while segments are loading from cache. Set this value to zero to wait for all segments to be loaded before announcing.|5000 (5 seconds)|
|`druid.segmentCache.numLoadingThreads`|How many segments to load concurrently from from deep storage.|1|
### Query Configs
#### Concurrent Requests
Druid uses Jetty to serve HTTP requests.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
#### Processing
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
#### General Query Configuration
##### GroupBy Query Config
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
##### Search Query Config
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
### Caching
You can optionally only configure caching to be enabled on the historical by setting caching configs here.
|Property|Possible Values|Description|Default|
|--------|---------------|-----------|-------|
|`druid.historical.cache.useCache`|`true`,`false`|Allow cache to be used. Cache will NOT be used unless this is set.|`false`|
|`druid.historical.cache.populateCache`|`true`,`false`|Allow cache to be populated. Cache will NOT be populated unless this is set.|`false`|
|`druid.historical.cache.unCacheable`|All druid query types|Do not attempt to cache queries whose types are in this array|`["groupBy","select"]`|
|`druid.historical.cache.numBackgroundThreads`|Non-negative integer|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best.|`0`|
|`druid.historical.cache.useCache`|Enable the cache on the broker.|false|
|`druid.historical.cache.populateCache`|Populate the cache on the broker.|false|
|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
#### Local Cache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
#### Memcache
|Property|Description|Default|
|--------|-----------|-------|
|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|

View File

@ -3,9 +3,54 @@ layout: doc_page
---
For general Indexing Service information, see [here](Indexing-Service.html).
#### Runtime Configuration
## Runtime Configuration
In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord has the following basic configs:
The indexing service uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
### Must be set on Overlord and Middle Manager
#### Node Configs
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
#### Task Logging
If you are running the indexing service in remote mode, the task logs must S3 or HDFS.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.type`|Choices:noop, s3, hdfs, file. Where to store task logs|file|
##### File Task Logs
Store task logs in the local filesystem.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.directory`|Local filesystem path.|log|
##### S3 Task Logs
Store task logs in S3.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.s3Bucket`|S3 bucket name.|none|
|`druid.indexer.logs.s3Prefix`|S3 key prefix.|none|
##### HDFS Task Logs
Store task logs in HDFS.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.logs.directory`|The directory to store logs.|none|
### Overlord Configs
|Property|Description|Default|
|--------|-----------|-------|
@ -23,7 +68,7 @@ The following configs only apply if the overlord is running in remote mode:
|--------|-----------|-------|
|`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task as been assigned to a middle manager before throwing an error.|PT5M|
|`druid.indexer.runner.minWorkerVersion`|The minimum middle manager version to send tasks to. |"0"|
|`druid.indexer.runner.compressZnodes`|Indicates whether or not the overlord should expect middle managers to compress Znodes.|false|
|`druid.indexer.runner.compressZnodes`|Indicates whether or not the overlord should expect middle managers to compress Znodes.|true|
|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|
There are additional configs for autoscaling (if it is enabled):
@ -33,9 +78,9 @@ There are additional configs for autoscaling (if it is enabled):
|`druid.indexer.autoscale.strategy`|Choices are "noop" or "ec2". Sets the strategy to run when autoscaling is required.|noop|
|`druid.indexer.autoscale.doAutoscale`|If set to "true" autoscaling will be enabled.|false|
|`druid.indexer.autoscale.provisionPeriod`|How often to check whether or not new middle managers should be added.|PT1M|
|`druid.indexer.autoscale.terminatePeriod`|How often to check when middle managers should be removed.|PT1H|
|`druid.indexer.autoscale.terminatePeriod`|How often to check when middle managers should be removed.|PT5M|
|`druid.indexer.autoscale.originTime`|The starting reference timestamp that the terminate period increments upon.|2012-01-01T00:55:00.000Z|
|`druid.indexer.autoscale.workerIdleTimeout`|How long can a worker be idle (not a run task) before it can be considered for termination.|PT10M|
|`druid.indexer.autoscale.workerIdleTimeout`|How long can a worker be idle (not a run task) before it can be considered for termination.|PT90M|
|`druid.indexer.autoscale.maxScalingDuration`|How long the overlord will wait around for a middle manager to show up before giving up.|PT15M|
|`druid.indexer.autoscale.numEventsToTrack`|The number of autoscaling related events (node creation and termination) to track.|10|
|`druid.indexer.autoscale.pendingTaskTimeout`|How long a task can be in "pending" state before the overlord tries to scale up.|PT30S|
@ -44,43 +89,105 @@ There are additional configs for autoscaling (if it is enabled):
#### Dynamic Configuration
Overlord dynamic configuration is mainly for autoscaling. The overlord reads a worker setup spec as a JSON object from the Druid [metadata storage](Metadata-storage.html) config table. This object contains information about the version of middle managers to create, the maximum and minimum number of middle managers in the cluster at one time, and additional information required to automatically create middle managers.
The overlord can dynamically change worker behavior.
The JSON object can be submitted to the overlord via a POST request at:
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker
```
A sample worker setup spec is shown below:
A sample worker config spec is shown below:
```json
{
"minVersion":"some_version",
"minNumWorkers":"0",
"maxNumWorkers":"10",
"nodeData": {
"selectStrategy": {
"type": "fillCapacityWithAffinity",
"affinityConfig": {
"affinity": {
"datasource1": ["ip1:port", "ip2:port"],
"datasource2": ["ip3:port"]
}
}
},
"autoScaler": {
"type": "ec2",
"amiId":"ami-someId",
"instanceType":"m1.xlarge",
"minInstances":"1",
"maxInstances":"1",
"securityGroupIds":["securityGroupIds"],
"keyName":"keyName"
"minNumWorkers": 2,
"maxNumWorkers": 12,
"envConfig": {
"availabilityZone": "us-east-1a",
"nodeData": {
"amiId": "${AMI}",
"instanceType": "c3.8xlarge",
"minInstances": 1,
"maxInstances": 1,
"securityGroupIds": ["${IDs}"],
"keyName": ${KEY_NAME}
},
"userData": {
"impl": "string",
"data":"version=:VERSION:",
"versionReplacementString":":VERSION:"
"data": "${SCRIPT_COMMAND}",
"versionReplacementString": ":VERSION:",
"version": null
}
}
}
}
```
Issuing a GET request at the same URL will return the current worker setup spec that is currently in place. The worker setup spec list above is just a sample and it is possible to extend the code base for other deployment environments. A description of the worker setup spec is shown below.
Issuing a GET request at the same URL will return the current worker config spec that is currently in place. The worker config spec list above is just a sample for EC2 and it is possible to extend the code base for other deployment environments. A description of the worker config spec is shown below.
|Property|Description|Default|
|--------|-----------|-------|
|`selectStrategy`|How to assign tasks to middlemanagers. Choices are `fillCapacity` and `fillCapacityWithAffinity`.|fillCapacity|
|`autoScaler`|Only used if autoscaling is enabled. See below.|null|
#### Worker Select Strategy
##### Fill Capacity
Workers are assigned tasks until capacity.
|Property|Description|Default|
|--------|-----------|-------|
|`type`|`fillCapacity`.|fillCapacity|
##### Fill Capacity With Affinity
An affinity config can be provided.
|Property|Description|Default|
|--------|-----------|-------|
|`type`|`fillCapacityWithAffinity`.|fillCapacityWithAffinity|
|`affinity`|A map to String to list of String host names.|{}|
Tasks will try to be assigned to preferred workers. Fill capacity strategy is used if no preference for a datasource specified.
#### Autoscaler
Amazon's EC2 is currently the only supported autoscaler.
|Property|Description|Default|
|--------|-----------|-------|
|`minNumWorkers`|The minimum number of workers that can be in the cluster at any given time.|0|
|`maxNumWorkers`|The maximum number of workers that can be in the cluster at any given time.|0|
|`nodeData`|A JSON object that describes how to launch new nodes. Currently, only EC2 is supported.|none; required|
|`userData`|A JSON object that describes how to configure new nodes. Currently, only EC2 is supported. If you have set druid.indexer.autoscale.workerVersion, this must have a versionReplacementString. Otherwise, a versionReplacementString is not necessary.|none; optional|
|`availabilityZone`|What availability zone to run in.|none|
|`nodeData`|A JSON object that describes how to launch new nodes.|none; required|
|`userData`|A JSON object that describes how to configure new nodes. If you have set druid.indexer.autoscale.workerVersion, this must have a versionReplacementString. Otherwise, a versionReplacementString is not necessary.|none; optional|
### MiddleManager Configs
Middle managers pass their configurations down to their child peons. The middle manager requires the following configs:
|Property|Description|Default|
|--------|-----------|-------|
|`druid.worker.ip`|The IP of the worker.|localhost|
|`druid.worker.version`|Version identifier for the middle manager.|0|
|`druid.worker.capacity`|Maximum number of tasks the middle manager can accept.|Number of available processors - 1|
|`druid.indexer.runner.compressZnodes`|Indicates whether or not the middle managers should compress Znodes.|false|
|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|
|`druid.indexer.runner.javaCommand`|Command required to execute java.|java|
|`druid.indexer.runner.javaOpts`|-X Java options to run the peon in its own JVM.|""|
|`druid.indexer.runner.classpath`|Java classpath for the peon.|System.getProperty("java.class.path")|
|`druid.indexer.runner.startPort`|The port that peons begin running on.|8081|
|`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|"com.metamx", "druid", "io.druid", "user.timezone","file.encoding"|

View File

@ -12,14 +12,14 @@ Depending on what `druid.storage.type` is set to, Druid will upload segments to
## My realtime node is not handing segments off
Make sure that the `druid.publish.type` on your real-time nodes is set to `metadata`. Also make sure that `druid.storage.type` is set to a deep storage that makes sense. Some example configs:
Make sure that the `druid.publish.type` on your real-time nodes is set to "metadata". Also make sure that `druid.storage.type` is set to a deep storage that makes sense. Some example configs:
```
druid.publish.type=metadata
druid.publish.type=db
druid.metadata.storage.connector.connectURI=jdbc\:metadata storage\://localhost\:3306/druid
druid.metadata.storage.connector.user=druid
druid.metadata.storage.connector.password=diurd
druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
druid.storage.type=s3
druid.storage.bucket=druid

View File

@ -13,7 +13,7 @@ The following can be configured on the plumber:
* `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
* `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
* `maxPendingPersists` is how many persists a plumber can do concurrently without starting to block.
* `maxPendingPersists` is the maximum number of persists that can be pending, but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes.
* `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
* `rejectionPolicy` controls how data sets the data acceptance policy for creating and handing off segments. The following policies are available:
* `serverTime` &ndash; The recommended policy for "current time" data, it is optimal for current data that is generated and ingested in real time. Uses `windowPeriod` to accept only those events that are inside the window looking forward and back.

View File

@ -2,118 +2,62 @@
layout: doc_page
---
Realtime Node Configuration
===========================
For general Real-time Node information, see [here](Realtime.html).
==============================
For general Realtime Node information, see [here](Realtime.html).
For Real-time Ingestion, see [Realtime Ingestion](Realtime-ingestion.html).
Runtime Configuration
---------------------
Quick Start
-----------
Run:
The realtime node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
```
io.druid.cli.Main server realtime
```
### Node Config
With the following JVM configuration:
|Property|Description|Default|
|--------|-----------|-------|
|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
### Realtime Operation
druid.host=localhost
druid.service=realtime
druid.port=8083
|Property|Description|Default|
|--------|-----------|-------|
|`druid.publish.type`|Where to publish segments. Choices are "noop" or "metadata".|metadata|
|`druid.realtime.specFile`|File location of realtime specFile.|none|
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.160"]
### Storing Intermediate Segments
|Property|Description|Default|
|--------|-----------|-------|
|`druid.segmentCache.locations`|Where intermediate segments are stored. The maxSize should always be zero.|none|
druid.zk.service.host=localhost
### Query Configs
# The realtime config file.
druid.realtime.specFile=/path/to/specFile
#### Processing
# Choices: metadata (hand off segments), noop (do not hand off segments).
druid.publish.type=metadata
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
druid.metadata.storage.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.metadata.storage.connector.user=druid
druid.metadata.storage.connector.password=diurd
#### General Query Configuration
druid.processing.buffer.sizeBytes=100000000
```
##### GroupBy Query Config
Production Configs
------------------
These production configs are using S3 as a deep store.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
JVM settings:
##### Search Query Config
```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
|Property|Description|Default|
|--------|-----------|-------|
|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/realtime
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.160","io.druid.extensions:druid-kafka-seven:0.6.160"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.metadata.storage.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.metadata.storage.connector.user=#{MYSQL_USER}
druid.metadata.storage.connector.password=#{MYSQL_PW}
druid.metadata.storage.connector.useValidationQuery=true
druid.metadata.storage.tables.base=prod
druid.publish.type=metadata
druid.processing.numThreads=3
druid.request.logging.type=file
druid.request.logging.dir=request_logs/
druid.realtime.specFile=conf/schemas.json
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
druid.storage.type=s3
druid.storage.bucket=#{S3_STORAGE_BUCKET}
druid.storage.baseKey=prod-realtime/v1
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "io.druid.segment.realtime.RealtimeMetricsMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
The realtime module also uses several of the default modules in [Configuration](Configuration.html). For more information on the realtime spec file (or configuration file), see [realtime ingestion](Realtime-ingestion.html) page.

View File

@ -134,13 +134,13 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|Field|Type|Description|Required|
|-----|----|-----------|--------|
|type|String|This should always be 'realtime'.|no|
|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size.|no (default == 5 million)|
|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 5 million)|
|windowPeriod|ISO 8601 Period String|The amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.|no (default == PT10m)|
|intermediatePersistPeriod|ISO8601 Period String|The period that determines the rate at which intermediate persists occur. These persists determine how often commits happen against the incoming realtime stream. If the realtime data loading process is interrupted at time T, it should be restarted to re-read data that arrived at T minus this period.|no (default == PT10m)|
|basePersistDirectory|String|The directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.|no (default == java tmp dir)|
|versioningPolicy|Object|How to version segments.|no (default == based on segment start time)|
|rejectionPolicy|Object|Controls how data sets the data acceptance policy for creating and handing off segments. More on this below.|no (default=='serverTime')|
|maxPendingPersists|Integer|How many persists a plumber can do concurrently without starting to block.|no (default == 0)|
|maxPendingPersists|Integer|Maximum number of persists that can be pending, but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 0; meaning one persist can be running concurrently with ingestion, and none can be queued up)|
|shardSpec|Object|This describes the shard that is represented by this server. This must be specified properly in order to have multiple realtime nodes indexing the same data stream in a sharded fashion.|no (default == 'NoneShardSpec'|
#### Rejection Policy

View File

@ -37,7 +37,7 @@ Extending the code
Realtime integration is intended to be extended in two ways:
1. Connect to data streams from varied systems ([Firehose](https://github.com/druid-io/druid-api/blob/master/src/main/java/io/druid/data/input/FirehoseFactory.java))
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/master/server/src/main/java/io/druid/segment/realtime/plumber/PlumberSchool.java))
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/druid-io/druid/blob/master/server/src/main/java/io/druid/segment/realtime/plumber/PlumberSchool.java))
The expectations are that the former will be very common and something that users of Druid will do on a fairly regular basis. Most users will probably never have to deal with the latter form of customization. Indeed, we hope that all potential use cases can be packaged up as part of Druid proper without requiring proprietary customization.

View File

@ -19,7 +19,7 @@ SSDs are highly recommended for historical and real-time nodes if you are not ru
# Provide Columns Names in Lexicographic Order
Although Druid supports schema-less ingestion of dimensions, because of [https://github.com/metamx/druid/issues/658](https://github.com/metamx/druid/issues/658), you may sometimes get bigger segments than necessary. To ensure segments are as compact as possible, providing dimension names in lexicographic order is recommended.
Although Druid supports schema-less ingestion of dimensions, because of [https://github.com/druid-io/druid/issues/658](https://github.com/druid-io/druid/issues/658), you may sometimes get bigger segments than necessary. To ensure segments are as compact as possible, providing dimension names in lexicographic order is recommended.
# Use Timeseries and TopN Queries Instead of GroupBy Where Possible

View File

@ -110,7 +110,7 @@ This just walks through getting the relevant software installed and running. Yo
1. Clone the git repository for druid, checkout a "stable" tag and build
git clone https://github.com/metamx/druid.git druid
git clone https://github.com/druid-io/druid.git druid
pushd druid
git checkout druid-0.4.12
export LANGUAGE=C

View File

@ -1,4 +1,31 @@
{
"queryType": "timeBoundary",
"dataSource": "twitterstream"
"description": "Simple data split up by hour",
"aggregations": [
{
"name": "tweets",
"type": "longSum",
"fieldName": "tweets"
},
{
"fieldName": "text_hll",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags_hll",
"name": "htag_hll",
"type": "hyperUnique"
},
{
"fieldName": "user_id_hll",
"name": "user_id_hll",
"type": "hyperUnique"
}
],
"dataSource": "twitterstream",
"granularity": "hour",
"intervals": [
"1970-01-01T00:00:00.000/2019-01-03T00:00:00.000"
],
"queryType": "timeseries"
}

View File

@ -0,0 +1,94 @@
{
"description": "Top 10 languages by count of tweets in the contiguous US",
"aggregations": [
{
"fieldName": "tweets",
"name": "tweets",
"type": "longSum"
},
{
"fieldName": "user_id_hll",
"name": "user_id_hll",
"type": "hyperUnique"
},
{
"fieldName": "contributors_hll",
"name": "contributors_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags_hll",
"name": "htags_hll",
"type": "hyperUnique"
},
{
"fieldName": "text_hll",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "min_follower_count",
"name": "min_follower_count",
"type": "min"
},
{
"fieldName": "max_follower_count",
"name": "max_follower_count",
"type": "max"
},
{
"fieldName": "min_friends_count",
"name": "min_friends_count",
"type": "min"
},
{
"fieldName": "max_friends_count",
"name": "max_friends_count",
"type": "max"
},
{
"fieldName": "min_statuses_count",
"name": "min_statuses_count",
"type": "min"
},
{
"fieldName": "max_statuses_count",
"name": "max_statuses_count",
"type": "max"
},
{
"fieldName": "min_retweet_count",
"name": "min_retweet_count",
"type": "min"
},
{
"fieldName": "max_retweet_count",
"name": "max_retweet_count",
"type": "max"
}
],
"dataSource": "twitterstream",
"dimension": "lang",
"filter": {
"bound": {
"maxCoords": [
50,
-65
],
"minCoords": [
25,
-127
],
"type": "rectangular"
},
"dimension": "geo",
"type": "spatial"
},
"granularity": "all",
"intervals": [
"2013-06-01T00:00/2020-01-01T00"
],
"metric": "tweets",
"queryType": "topN",
"threshold": "10"
}

View File

@ -1,119 +1,151 @@
[
{
"description": "Ingestion spec for Twitter spritzer. Dimension values taken from io.druid.examples.twitter.TwitterSpritzerFirehoseFactory",
"spec": {
"dataSchema": {
"dataSource": "twitterstream",
"parser": {
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "utcdt",
"format": "iso"
},
"dimensionsSpec": {
"dimensions": [
],
"dimensionExclusions": [
],
"spatialDimensions": [
]
}
}
"granularitySpec": {
"queryGranularity": "all",
"segmentGranularity": "hour",
"type": "uniform"
},
"metricsSpec": [
{
"type": "count",
"name": "tweets"
"name": "tweets",
"type": "count"
},
{
"type": "doubleSum",
"fieldName": "follower_count",
"name": "total_follower_count"
"name": "total_follower_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "retweet_count",
"name": "total_retweet_count"
"name": "total_retweet_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "friends_count",
"name": "total_friends_count"
"name": "total_friends_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "statuses_count",
"name": "total_statuses_count"
"name": "total_statuses_count",
"type": "doubleSum"
},
{
"fieldName": "text",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "user_id",
"name": "user_id_hll",
"type": "hyperUnique"
},
{
"fieldName": "contributors",
"name": "contributors_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags",
"name": "htags_hll",
"type": "hyperUnique"
},
{
"type": "min",
"fieldName": "follower_count",
"name": "min_follower_count"
"name": "min_follower_count",
"type": "min"
},
{
"type": "max",
"fieldName": "follower_count",
"name": "max_follower_count"
"name": "max_follower_count",
"type": "max"
},
{
"type": "min",
"fieldName": "friends_count",
"name": "min_friends_count"
"name": "min_friends_count",
"type": "min"
},
{
"type": "max",
"fieldName": "friends_count",
"name": "max_friends_count"
"name": "max_friends_count",
"type": "max"
},
{
"type": "min",
"fieldName": "statuses_count",
"name": "min_statuses_count"
"name": "min_statuses_count",
"type": "min"
},
{
"type": "max",
"fieldName": "statuses_count",
"name": "max_statuses_count"
"name": "max_statuses_count",
"type": "max"
},
{
"type": "min",
"fieldName": "retweet_count",
"name": "min_retweet_count"
"name": "min_retweet_count",
"type": "min"
},
{
"type": "max",
"fieldName": "retweet_count",
"name": "max_retweet_count"
"name": "max_retweet_count",
"type": "max"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": "NONE"
"parser": {
"parseSpec": {
"dimensionsSpec": {
"dimensions": [
"text",
"htags",
"contributors",
"lat",
"lon",
"retweet_count",
"follower_count",
"friendscount",
"lang",
"utc_offset",
"statuses_count",
"user_id",
"ts"
],
"dimensionExclusions": [
],
"spatialDimensions": [
{
"dimName": "geo",
"dims": [
"lat",
"lon"
]
}
]
},
"format": "json",
"timestampSpec": {
"column": "ts",
"format": "millis"
}
}
}
},
"ioConfig": {
"type": "realtime",
"firehose": {
"type": "twitzer",
"maxEventCount": 500000,
"maxRunMinutes": 120
"maxRunMinutes": 120,
"type": "twitzer"
},
"plumber": {
"type": "realtime"
}
},
"tuningConfig": {
"type": "realtime",
"intermediatePersistPeriod": "PT10m",
"maxRowsInMemory": 500000,
"intermediatePersistPeriod": "PT2m",
"windowPeriod": "PT3m",
"basePersistDirectory": "\/tmp\/realtime\/basePersist",
"rejectionPolicy": {
"type": "messageTime"
"type": "realtime",
"windowPeriod": "PT10m"
}
},
"type": "index_realtime"
}
}
]

View File

@ -1,6 +1,6 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
* Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@ -22,6 +22,8 @@ package io.druid.examples.twitter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.metamx.common.logger.Logger;
import io.druid.data.input.Firehose;
@ -30,6 +32,7 @@ import io.druid.data.input.InputRow;
import io.druid.data.input.MapBasedInputRow;
import io.druid.data.input.impl.InputRowParser;
import twitter4j.ConnectionLifeCycleListener;
import twitter4j.GeoLocation;
import twitter4j.HashtagEntity;
import twitter4j.StallWarning;
import twitter4j.Status;
@ -39,12 +42,13 @@ import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.User;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
@ -58,25 +62,27 @@ import static java.lang.Thread.sleep;
* with timestamps along with ??.
* The generated tuples have the form (timestamp, ????)
* where the timestamp is from the twitter event.
*
* <p/>
* Example spec file:
*
* <p/>
* Example query using POST to /druid/v2/?w (where w is an arbitrary parameter and the date and time
* is UTC):
*
* <p/>
* Notes on twitter.com HTTP (REST) API: v1.0 will be disabled around 2013-03 so v1.1 should be used;
* twitter4j 3.0 (not yet released) will support the v1.1 api.
* Specifically, we should be using https://stream.twitter.com/1.1/statuses/sample.json
* See: http://jira.twitter4j.org/browse/TFJ-186
*
* <p/>
* Notes on JSON parsing: as of twitter4j 2.2.x, the json parser has some bugs (ex: Status.toString()
* can have number format exceptions), so it might be necessary to extract raw json and process it
* separately. If so, set twitter4.jsonStoreEnabled=true and look at DataObjectFactory#getRawJSON();
* com.fasterxml.jackson.databind.ObjectMapper should be used to parse.
*
* @author pbaclace
*/
@JsonTypeName("twitzer")
public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowParser> {
public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowParser>
{
private static final Logger log = new Logger(TwitterSpritzerFirehoseFactory.class);
/**
* max events to receive, -1 is infinite, 0 means nothing is delivered; use this to prevent
@ -107,7 +113,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
@Override
public Firehose connect(InputRowParser parser) throws IOException
{
final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() {
final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener()
{
@Override
public void onConnect()
{
@ -135,19 +142,15 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
final int QUEUE_SIZE = 2000;
/** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */
final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE);
final LinkedList<String> dimensions = new LinkedList<String>();
final long startMsec = System.currentTimeMillis();
dimensions.add("htags");
dimensions.add("lang");
dimensions.add("utc_offset");
//
// set up Twitter Spritzer
//
twitterStream = new TwitterStreamFactory().getInstance();
twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j
statusListener = new StatusListener()
{ // This is what really gets called to deliver stuff from twitter4j
@Override
public void onStatus(Status status)
{
@ -160,7 +163,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
if (!success) {
log.warn("queue too slow!");
}
} catch (InterruptedException e) {
}
catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
}
@ -192,7 +196,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
}
@Override
public void onStallWarning(StallWarning warning) {
public void onStallWarning(StallWarning warning)
{
System.out.println("Got stall warning:" + warning);
}
};
@ -201,31 +206,33 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
twitterStream.sample(); // creates a generic StatusStream
log.info("returned from sample()");
return new Firehose() {
return new Firehose()
{
private final Runnable doNothingRunnable = new Runnable() {
private final Runnable doNothingRunnable = new Runnable()
{
public void run()
{
}
};
private long rowCount = 0L;
private boolean waitIfmax = (maxEventCount < 0L);
private final Map<String, Object> theMap = new HashMap<String, Object>(2);
private boolean waitIfmax = (getMaxEventCount() < 0L);
private final Map<String, Object> theMap = new TreeMap<>();
// DIY json parsing // private final ObjectMapper omapper = new ObjectMapper();
private boolean maxTimeReached()
{
if (maxRunMinutes <= 0) {
if (getMaxRunMinutes() <= 0) {
return false;
} else {
return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes;
return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes();
}
}
private boolean maxCountReached()
{
return maxEventCount >= 0 && rowCount >= maxEventCount;
return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount();
}
@Override
@ -253,7 +260,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
try {
log.info("reached limit, sleeping a long time...");
sleep(2000000000L);
} catch (InterruptedException e) {
}
catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
} else {
@ -267,30 +275,60 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
Status status;
try {
status = queue.take();
} catch (InterruptedException e) {
}
catch (InterruptedException e) {
throw new RuntimeException("InterruptedException", e);
}
HashtagEntity[] hts = status.getHashtagEntities();
if (hts != null && hts.length > 0) {
List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length);
for (HashtagEntity ht : hts) {
hashTags.add(ht.getText());
String text = status.getText();
theMap.put("text", (null == text) ? "" : text);
theMap.put(
"htags", (hts.length > 0) ? Lists.transform(
Arrays.asList(hts), new Function<HashtagEntity, String>()
{
@Nullable
@Override
public String apply(HashtagEntity input)
{
return input.getText();
}
}
) : ImmutableList.<String>of()
);
theMap.put("htags", Arrays.asList(hashTags.get(0)));
long[] lcontrobutors = status.getContributors();
List<String> contributors = new ArrayList<>();
for (long contrib : lcontrobutors) {
contributors.add(String.format("%d", contrib));
}
theMap.put("contributors", contributors);
GeoLocation geoLocation = status.getGeoLocation();
if (null != geoLocation) {
double lat = status.getGeoLocation().getLatitude();
double lon = status.getGeoLocation().getLongitude();
theMap.put("lat", lat);
theMap.put("lon", lon);
} else {
theMap.put("lat", null);
theMap.put("lon", null);
}
long retweetCount = status.getRetweetCount();
theMap.put("retweet_count", retweetCount);
User user = status.getUser();
if (user != null) {
theMap.put("follower_count", user.getFollowersCount());
theMap.put("friends_count", user.getFriendsCount());
theMap.put("lang", user.getLang());
theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available?
theMap.put("statuses_count", user.getStatusesCount());
}
final boolean hasUser = (null != user);
theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0);
theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0);
theMap.put("lang", hasUser ? user.getLang() : "");
theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1); // resolution in seconds, -1 if not available?
theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0);
theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : "");
theMap.put("ts",status.getCreatedAt().getTime());
List<String> dimensions = Lists.newArrayList(theMap.keySet());
return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap);
}
@ -311,4 +349,15 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
};
}
@JsonProperty
public int getMaxEventCount()
{
return maxEventCount;
}
@JsonProperty
public int getMaxRunMinutes()
{
return maxRunMinutes;
}
}

View File

@ -56,7 +56,7 @@ public class HdfsTaskLogs implements TaskLogs
final Path path = getTaskLogFileFromId(taskId);
log.info("Writing task log to: %s", path);
Configuration conf = new Configuration();
final FileSystem fs = FileSystem.get(conf);
final FileSystem fs = path.getFileSystem(conf);
FileUtil.copy(logFile, fs, path, false, conf);
log.info("Wrote task log to: %s", path);
}
@ -65,7 +65,7 @@ public class HdfsTaskLogs implements TaskLogs
public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException
{
final Path path = getTaskLogFileFromId(taskId);
final FileSystem fs = FileSystem.get(new Configuration());
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path)) {
return Optional.<ByteSource>of(
new ByteSource()

View File

@ -18,6 +18,7 @@
*/
package io.druid.storage.hdfs.tasklog;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import javax.validation.constraints.NotNull;
@ -27,12 +28,12 @@ import javax.validation.constraints.NotNull;
*/
public class HdfsTaskLogsConfig
{
@JsonProperty
@NotNull
private String directory;
public HdfsTaskLogsConfig(String directory)
@JsonCreator
public HdfsTaskLogsConfig(@JsonProperty("directory") String directory)
{
this.directory = directory;
}

View File

@ -76,6 +76,11 @@
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>

View File

@ -19,7 +19,13 @@
package io.druid.storage.s3;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.auth.SystemPropertiesCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.fasterxml.jackson.databind.Module;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
@ -62,14 +68,18 @@ public class S3StorageDruidModule implements DruidModule
binder.bind(S3TaskLogs.class).in(LazySingleton.class);
}
@Provides
@LazySingleton
public AWSCredentialsProvider getAWSCredentialsProvider(final AWSCredentialsConfig config)
private static class ConfigDrivenAwsCredentialsConfigProvider implements AWSCredentialsProvider
{
private AWSCredentialsConfig config;
public ConfigDrivenAwsCredentialsConfigProvider(AWSCredentialsConfig config) {
this.config = config;
}
@Override
public com.amazonaws.auth.AWSCredentials getCredentials()
{
if (!Strings.isNullOrEmpty(config.getAccessKey()) && !Strings.isNullOrEmpty(config.getSecretKey())) {
return new AWSCredentialsProvider() {
@Override
public com.amazonaws.auth.AWSCredentials getCredentials() {
return new com.amazonaws.auth.AWSCredentials() {
@Override
public String getAWSAccessKeyId() {
@ -82,13 +92,56 @@ public class S3StorageDruidModule implements DruidModule
}
};
}
throw new AmazonClientException("Unable to load AWS credentials from druid AWSCredentialsConfig");
}
@Override
public void refresh() {}
};
} else {
return new FileSessionCredentialsProvider(config.getFileSessionCredentials());
}
private static class LazyFileSessionCredentialsProvider implements AWSCredentialsProvider
{
private AWSCredentialsConfig config;
private FileSessionCredentialsProvider provider;
public LazyFileSessionCredentialsProvider(AWSCredentialsConfig config) {
this.config = config;
}
private FileSessionCredentialsProvider getUnderlyingProvider() {
if (provider == null) {
synchronized (config) {
if (provider == null) {
provider = new FileSessionCredentialsProvider(config.getFileSessionCredentials());
}
}
}
return provider;
}
@Override
public com.amazonaws.auth.AWSCredentials getCredentials()
{
return getUnderlyingProvider().getCredentials();
}
@Override
public void refresh() {
getUnderlyingProvider().refresh();
}
}
@Provides
@LazySingleton
public AWSCredentialsProvider getAWSCredentialsProvider(final AWSCredentialsConfig config)
{
return new AWSCredentialsProviderChain(
new ConfigDrivenAwsCredentialsConfigProvider(config),
new LazyFileSessionCredentialsProvider(config),
new EnvironmentVariableCredentialsProvider(),
new SystemPropertiesCredentialsProvider(),
new ProfileCredentialsProvider(),
new InstanceProfileCredentialsProvider());
}
@Provides

View File

@ -67,6 +67,7 @@ import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet;
@ -83,10 +84,15 @@ public class HadoopDruidIndexerConfig
public static final Joiner tabJoiner = Joiner.on("\t");
public static final ObjectMapper jsonMapper;
// workaround to pass down druid.processing.bitmap.type, see IndexGeneratorJob.run()
protected static final Properties properties;
private static final String DEFAULT_WORKING_PATH = "/tmp/druid-indexing";
static {
injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(),
ImmutableList.<Object>of(
ImmutableList.<Module>of(
new Module()
{
@Override
@ -100,6 +106,7 @@ public class HadoopDruidIndexerConfig
)
);
jsonMapper = injector.getInstance(ObjectMapper.class);
properties = injector.getInstance(Properties.class);
}
public static enum IndexJobCounters
@ -333,7 +340,11 @@ public class HadoopDruidIndexerConfig
return Optional.absent();
}
final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart()).getShardSpec(rollupGran.truncate(inputRow.getTimestampFromEpoch()), inputRow);
final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart())
.getShardSpec(
rollupGran.truncate(inputRow.getTimestampFromEpoch()),
inputRow
);
final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(actualSpec);
return Optional.of(
@ -403,6 +414,12 @@ public class HadoopDruidIndexerConfig
return schema.getTuningConfig().isPersistInHeap();
}
public String getWorkingPath()
{
final String workingPath = schema.getTuningConfig().getWorkingPath();
return workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
}
/******************************************
Path helper logic
******************************************/
@ -418,7 +435,7 @@ public class HadoopDruidIndexerConfig
return new Path(
String.format(
"%s/%s/%s",
schema.getTuningConfig().getWorkingPath(),
getWorkingPath(),
schema.getDataSchema().getDataSource(),
schema.getTuningConfig().getVersion().replace(":", "")
)

View File

@ -77,7 +77,9 @@ public class HadoopDruidIndexerJob implements Jobby
if (metadataStorageUpdaterJob != null) {
jobs.add(metadataStorageUpdaterJob);
} else {
log.info("No updaterJobSpec set, not uploading to database");
log.info(
"No metadataStorageUpdaterJob set in the config. This is cool if you are running a hadoop index task, otherwise nothing will be uploaded to database."
);
}
jobs.add(

View File

@ -36,7 +36,6 @@ import java.util.Map;
@JsonTypeName("hadoop")
public class HadoopTuningConfig implements TuningConfig
{
private static final String DEFAULT_WORKING_PATH = "/tmp/druid-indexing";
private static final PartitionsSpec DEFAULT_PARTITIONS_SPEC = HashedPartitionsSpec.makeDefaultHashedPartitionsSpec();
private static final Map<DateTime, List<HadoopyShardSpec>> DEFAULT_SHARD_SPECS = ImmutableMap.<DateTime, List<HadoopyShardSpec>>of();
private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 80000;
@ -46,7 +45,7 @@ public class HadoopTuningConfig implements TuningConfig
public static HadoopTuningConfig makeDefaultTuningConfig()
{
return new HadoopTuningConfig(
DEFAULT_WORKING_PATH,
null,
new DateTime().toString(),
DEFAULT_PARTITIONS_SPEC,
DEFAULT_SHARD_SPECS,
@ -99,7 +98,7 @@ public class HadoopTuningConfig implements TuningConfig
final @JsonProperty("aggregationBufferRatio") Float aggregationBufferRatio
)
{
this.workingPath = workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
this.workingPath = workingPath;
this.version = version == null ? new DateTime().toString() : version;
this.partitionsSpec = partitionsSpec == null ? DEFAULT_PARTITIONS_SPEC : partitionsSpec;
this.shardSpecs = shardSpecs == null ? DEFAULT_SHARD_SPECS : shardSpecs;

View File

@ -21,6 +21,7 @@ package io.druid.indexer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Optional;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@ -44,6 +45,8 @@ import io.druid.segment.LoggingProgressIndicator;
import io.druid.segment.ProgressIndicator;
import io.druid.segment.QueryableIndex;
import io.druid.segment.SegmentUtils;
import io.druid.segment.data.BitmapSerde;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexSchema;
import io.druid.segment.incremental.OffheapIncrementalIndex;
@ -180,6 +183,19 @@ public class IndexGeneratorJob implements Jobby
config.addInputPaths(job);
config.addJobProperties(job);
// hack to get druid.processing.bitmap property passed down to hadoop job.
// once IndexIO doesn't rely on globally injected properties, we can move this into the HadoopTuningConfig.
final String bitmapProperty = "druid.processing.bitmap.type";
final String bitmapType = HadoopDruidIndexerConfig.properties.getProperty(bitmapProperty);
if(bitmapType != null) {
for(String property : new String[] {"mapreduce.reduce.java.opts", "mapreduce.map.java.opts"}) {
// prepend property to allow overriding using hadoop.xxx properties by JobHelper.injectSystemProperties above
String value = Strings.nullToEmpty(job.getConfiguration().get(property));
job.getConfiguration().set(property, String.format("-D%s=%s %s", bitmapProperty, bitmapType, value));
}
}
config.intoConfiguration(job);
JobHelper.setupClasspath(config, job);

View File

@ -63,7 +63,7 @@ public class JobHelper
final Configuration conf = groupByJob.getConfiguration();
final FileSystem fs = FileSystem.get(conf);
Path distributedClassPath = new Path(config.getSchema().getTuningConfig().getWorkingPath(), "classpath");
Path distributedClassPath = new Path(config.getWorkingPath(), "classpath");
if (fs instanceof LocalFileSystem) {
return;

View File

@ -22,6 +22,7 @@ package io.druid.guice;
import com.google.inject.Binder;
import io.druid.indexing.overlord.config.ForkingTaskRunnerConfig;
import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
import io.druid.server.initialization.IndexerZkConfig;
/**
*/
@ -31,5 +32,6 @@ public class IndexingServiceModuleHelper
{
JsonConfigProvider.bind(binder, "druid.indexer.runner", ForkingTaskRunnerConfig.class);
JsonConfigProvider.bind(binder, "druid.indexer.runner", RemoteTaskRunnerConfig.class);
JsonConfigProvider.bind(binder, "druid.zk.paths.indexer", IndexerZkConfig.class);
}
}

View File

@ -111,13 +111,11 @@ public class HadoopIndexTask extends AbstractTask
this.spec = spec;
// Some HadoopIngestionSpec stuff doesn't make sense in the context of the indexing service
if (this.spec.getTuningConfig().getWorkingPath() != null) {
log.error("workingPath should be absent in your spec! Ignoring");
}
Preconditions.checkArgument(
this.spec.getIOConfig().getSegmentOutputPath() == null,
"segmentOutputPath must be absent"
);
Preconditions.checkArgument(this.spec.getTuningConfig().getWorkingPath() == null, "workingPath must be absent");
Preconditions.checkArgument(
this.spec.getIOConfig().getMetadataUpdateSpec() == null,
"metadataUpdateSpec must be absent"

View File

@ -32,8 +32,6 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteSource;
import com.google.common.io.ByteStreams;
import com.google.common.io.InputSupplier;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
@ -53,6 +51,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
import io.druid.indexing.overlord.setup.WorkerSelectStrategy;
import io.druid.indexing.worker.TaskAnnouncement;
import io.druid.indexing.worker.Worker;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import io.druid.tasklogs.TaskLogStreamer;
import org.apache.commons.lang.mutable.MutableInt;
@ -104,7 +103,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
private final ObjectMapper jsonMapper;
private final RemoteTaskRunnerConfig config;
private final ZkPathsConfig zkPaths;
private final IndexerZkConfig indexerZkConfig;
private final CuratorFramework cf;
private final PathChildrenCacheFactory pathChildrenCacheFactory;
private final PathChildrenCache workerPathCache;
@ -131,7 +130,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
public RemoteTaskRunner(
ObjectMapper jsonMapper,
RemoteTaskRunnerConfig config,
ZkPathsConfig zkPaths,
IndexerZkConfig indexerZkConfig,
CuratorFramework cf,
PathChildrenCacheFactory pathChildrenCacheFactory,
HttpClient httpClient,
@ -140,10 +139,10 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
{
this.jsonMapper = jsonMapper;
this.config = config;
this.zkPaths = zkPaths;
this.indexerZkConfig = indexerZkConfig;
this.cf = cf;
this.pathChildrenCacheFactory = pathChildrenCacheFactory;
this.workerPathCache = pathChildrenCacheFactory.make(cf, zkPaths.getIndexerAnnouncementPath());
this.workerPathCache = pathChildrenCacheFactory.make(cf, indexerZkConfig.getAnnouncementsPath());
this.httpClient = httpClient;
this.strategy = strategy;
}
@ -498,7 +497,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
} else {
final String workerId = worker.getHost();
log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
final String statusPath = JOINER.join(zkPaths.getIndexerStatusPath(), workerId, taskId);
final String statusPath = JOINER.join(indexerZkConfig.getStatus(), workerId, taskId);
try {
cf.delete().guaranteed().forPath(statusPath);
}
@ -584,7 +583,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
throw new ISE("Length of raw bytes for task too large[%,d > %,d]", rawBytes.length, config.getMaxZnodeBytes());
}
String taskPath = JOINER.join(zkPaths.getIndexerTaskPath(), theWorker.getHost(), task.getId());
String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), theWorker.getHost(), task.getId());
if (cf.checkExists().forPath(taskPath) == null) {
cf.create()
@ -644,7 +643,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
log.info("Worker[%s] reportin' for duty!", worker.getHost());
try {
final String workerStatusPath = JOINER.join(zkPaths.getIndexerStatusPath(), worker.getHost());
final String workerStatusPath = JOINER.join(indexerZkConfig.getStatus(), worker.getHost());
final PathChildrenCache statusCache = pathChildrenCacheFactory.make(cf, workerStatusPath);
final SettableFuture<ZkWorker> retVal = SettableFuture.create();
final ZkWorker zkWorker = new ZkWorker(
@ -789,7 +788,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
if (zkWorker != null) {
try {
List<String> tasksToFail = Lists.newArrayList(
cf.getChildren().forPath(JOINER.join(zkPaths.getIndexerTaskPath(), worker.getHost()))
cf.getChildren().forPath(JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost()))
);
log.info("[%s]: Found %d tasks assigned", worker.getHost(), tasksToFail.size());
@ -807,7 +806,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
for (String assignedTask : tasksToFail) {
RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask);
if (taskRunnerWorkItem != null) {
String taskPath = JOINER.join(zkPaths.getIndexerTaskPath(), worker.getHost(), assignedTask);
String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost(), assignedTask);
if (cf.checkExists().forPath(taskPath) != null) {
cf.delete().guaranteed().forPath(taskPath);
}

View File

@ -29,6 +29,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
import io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy;
import io.druid.indexing.overlord.setup.WorkerBehaviorConfig;
import io.druid.indexing.overlord.setup.WorkerSelectStrategy;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import org.apache.curator.framework.CuratorFramework;
@ -38,7 +39,7 @@ public class RemoteTaskRunnerFactory implements TaskRunnerFactory
{
private final CuratorFramework curator;
private final RemoteTaskRunnerConfig remoteTaskRunnerConfig;
private final ZkPathsConfig zkPaths;
private final IndexerZkConfig zkPaths;
private final ObjectMapper jsonMapper;
private final HttpClient httpClient;
private final WorkerSelectStrategy strategy;
@ -47,7 +48,7 @@ public class RemoteTaskRunnerFactory implements TaskRunnerFactory
public RemoteTaskRunnerFactory(
final CuratorFramework curator,
final RemoteTaskRunnerConfig remoteTaskRunnerConfig,
final ZkPathsConfig zkPaths,
final IndexerZkConfig zkPaths,
final ObjectMapper jsonMapper,
@Global final HttpClient httpClient,
final Supplier<WorkerBehaviorConfig> workerBehaviourConfigSupplier

View File

@ -38,6 +38,7 @@ import io.druid.indexing.overlord.config.TaskQueueConfig;
import io.druid.indexing.overlord.autoscaling.ResourceManagementScheduler;
import io.druid.indexing.overlord.autoscaling.ResourceManagementSchedulerFactory;
import io.druid.server.DruidNode;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.recipes.leader.LeaderSelector;
@ -74,7 +75,7 @@ public class TaskMaster
final TaskStorage taskStorage,
final TaskActionClientFactory taskActionClientFactory,
@Self final DruidNode node,
final ZkPathsConfig zkPaths,
final IndexerZkConfig zkPaths,
final TaskRunnerFactory runnerFactory,
final ResourceManagementSchedulerFactory managementSchedulerFactory,
final CuratorFramework curator,
@ -85,7 +86,7 @@ public class TaskMaster
this.taskActionClientFactory = taskActionClientFactory;
this.leaderSelector = new LeaderSelector(
curator,
zkPaths.getIndexerLeaderLatchPath(),
zkPaths.getLeaderLatchPath(),
new LeaderSelectorListener()
{
@Override

View File

@ -19,6 +19,7 @@
package io.druid.indexing.overlord;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
@ -40,13 +41,17 @@ import io.druid.indexing.common.task.Task;
import io.druid.query.NoopQueryRunner;
import io.druid.query.Query;
import io.druid.query.QueryRunner;
import io.druid.query.QueryRunnerFactoryConglomerate;
import io.druid.query.QuerySegmentWalker;
import io.druid.query.SegmentDescriptor;
import io.druid.query.UnionQueryRunner;
import org.apache.commons.io.FileUtils;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.File;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentSkipListSet;
@ -59,16 +64,18 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
private final TaskToolboxFactory toolboxFactory;
private final ListeningExecutorService exec;
private final Set<ThreadPoolTaskRunnerWorkItem> runningItems = new ConcurrentSkipListSet<>();
private final QueryRunnerFactoryConglomerate conglomerate;
private static final EmittingLogger log = new EmittingLogger(ThreadPoolTaskRunner.class);
@Inject
public ThreadPoolTaskRunner(
TaskToolboxFactory toolboxFactory
TaskToolboxFactory toolboxFactory,
QueryRunnerFactoryConglomerate conglomerate
)
{
this.toolboxFactory = Preconditions.checkNotNull(toolboxFactory, "toolboxFactory");
this.exec = MoreExecutors.listeningDecorator(Execs.singleThreaded("task-runner-%d"));
this.conglomerate = conglomerate;
}
@LifecycleStop
@ -150,10 +157,16 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
return getQueryRunnerImpl(query);
}
private <T> QueryRunner<T> getQueryRunnerImpl(Query<T> query)
private <T> QueryRunner<T> getQueryRunnerImpl(final Query<T> query)
{
return new UnionQueryRunner<>(
Iterables.transform(
query.getDataSource().getNames(), new Function<String, QueryRunner>()
{
@Override
public QueryRunner apply(String queryDataSource)
{
QueryRunner<T> queryRunner = null;
final String queryDataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
for (final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem : ImmutableList.copyOf(runningItems)) {
final Task task = taskRunnerWorkItem.getTask();
@ -171,8 +184,16 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
}
}
}
if (queryRunner != null) {
return queryRunner;
} else {
return new NoopQueryRunner();
}
}
}
), conglomerate.findFactory(query).getToolchest()
);
return queryRunner == null ? new NoopQueryRunner<T>() : queryRunner;
}
private static class ThreadPoolTaskRunnerWorkItem extends TaskRunnerWorkItem

View File

@ -34,7 +34,7 @@ public class ResourceManagementSchedulerConfig
private Period provisionPeriod = new Period("PT1M");
@JsonProperty
private Period terminatePeriod = new Period("PT1H");
private Period terminatePeriod = new Period("PT5M");
@JsonProperty
private DateTime originTime = new DateTime("2012-01-01T00:55:00.000Z");

View File

@ -27,7 +27,7 @@ import org.joda.time.Period;
public class SimpleResourceManagementConfig
{
@JsonProperty
private Period workerIdleTimeout = new Period("PT10m");
private Period workerIdleTimeout = new Period("PT90m");
@JsonProperty
private Period maxScalingDuration = new Period("PT15M");

View File

@ -412,9 +412,7 @@ public class OverlordResource
try {
final Optional<ByteSource> stream = taskLogStreamer.streamTaskLog(taskid, offset);
if (stream.isPresent()) {
try(InputStream istream = stream.get().openStream()) {
return Response.ok(istream).build();
}
return Response.ok(stream.get().openStream()).build();
} else {
return Response.status(Response.Status.NOT_FOUND)
.entity(

View File

@ -31,7 +31,7 @@ import com.metamx.common.lifecycle.LifecycleStop;
import com.metamx.common.logger.Logger;
import io.druid.curator.announcement.Announcer;
import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
import io.druid.server.initialization.ZkPathsConfig;
import io.druid.server.initialization.IndexerZkConfig;
import org.apache.curator.framework.CuratorFramework;
import org.apache.zookeeper.CreateMode;
import org.joda.time.DateTime;
@ -63,7 +63,7 @@ public class WorkerCuratorCoordinator
@Inject
public WorkerCuratorCoordinator(
ObjectMapper jsonMapper,
ZkPathsConfig zkPaths,
IndexerZkConfig indexerZkConfig,
RemoteTaskRunnerConfig config,
CuratorFramework curatorFramework,
Worker worker
@ -76,9 +76,9 @@ public class WorkerCuratorCoordinator
this.announcer = new Announcer(curatorFramework, MoreExecutors.sameThreadExecutor());
this.baseAnnouncementsPath = getPath(Arrays.asList(zkPaths.getIndexerAnnouncementPath(), worker.getHost()));
this.baseTaskPath = getPath(Arrays.asList(zkPaths.getIndexerTaskPath(), worker.getHost()));
this.baseStatusPath = getPath(Arrays.asList(zkPaths.getIndexerStatusPath(), worker.getHost()));
this.baseAnnouncementsPath = getPath(Arrays.asList(indexerZkConfig.getAnnouncementsPath(), worker.getHost()));
this.baseTaskPath = getPath(Arrays.asList(indexerZkConfig.getTasksPath(), worker.getHost()));
this.baseStatusPath = getPath(Arrays.asList(indexerZkConfig.getStatus(), worker.getHost()));
}
@LifecycleStart

View File

@ -41,6 +41,7 @@ import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.IOException;
import java.io.InputStream;
/**
@ -167,10 +168,10 @@ public class WorkerResource
final Optional<ByteSource> stream = taskRunner.streamTaskLog(taskid, offset);
if (stream.isPresent()) {
try (InputStream logStream = stream.get().openStream()) {
return Response.ok(logStream).build();
try {
return Response.ok(stream.get().openStream()).build();
}
catch (Exception e) {
catch (IOException e) {
log.warn(e, "Failed to read log for task: %s", taskid);
return Response.serverError().build();
}

View File

@ -0,0 +1,102 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.server.initialization;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.curator.utils.ZKPaths;
/**
*
*/
public class IndexerZkConfig
{
@JsonCreator
public IndexerZkConfig(
@JacksonInject ZkPathsConfig zkPathsConfig,
@JsonProperty("base") String base,
@JsonProperty("announcementsPath") String announcementsPath,
@JsonProperty("tasksPath") String tasksPath,
@JsonProperty("status") String status,
@JsonProperty("leaderLatchPath") String leaderLatchPath
)
{
this.zkPathsConfig = zkPathsConfig;
this.base = base;
this.announcementsPath = announcementsPath;
this.tasksPath = tasksPath;
this.status = status;
this.leaderLatchPath = leaderLatchPath;
}
@JacksonInject
private final ZkPathsConfig zkPathsConfig;
@JsonProperty
private final String base;
@JsonProperty
private final String announcementsPath;
@JsonProperty
private final String tasksPath;
@JsonProperty
private final String status;
@JsonProperty
private final String leaderLatchPath;
private String defaultIndexerPath(final String subPath)
{
return getZkPathsConfig().defaultPath(ZKPaths.makePath(getBase(), subPath));
}
public String getBase()
{
return base == null ? "indexer" : base;
}
public String getAnnouncementsPath()
{
return announcementsPath == null ? defaultIndexerPath("announcements") : announcementsPath;
}
public String getTasksPath()
{
return tasksPath == null ? defaultIndexerPath("tasks") : tasksPath;
}
public String getStatus()
{
return status == null ? defaultIndexerPath("status") : status;
}
public String getLeaderLatchPath()
{
return leaderLatchPath == null ? defaultIndexerPath("leaderLatchPath") : leaderLatchPath;
}
public ZkPathsConfig getZkPathsConfig()
{
return zkPathsConfig;
}
}

View File

@ -20,6 +20,7 @@
package io.druid.indexing.common.task;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import com.metamx.common.Granularity;
import io.druid.data.input.impl.CSVParseSpec;
import io.druid.data.input.impl.DimensionsSpec;
@ -57,7 +58,10 @@ public class IndexTaskTest
@Test
public void testDeterminePartitions() throws Exception
{
File tmpFile = File.createTempFile("druid", "index");
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
tmpFile.deleteOnExit();
PrintWriter writer = new PrintWriter(tmpFile);
@ -97,7 +101,7 @@ public class IndexTaskTest
),
new IndexTask.IndexIOConfig(
new LocalFirehoseFactory(
tmpFile.getParentFile(),
tmpDir,
"druid*",
null
)

View File

@ -43,6 +43,7 @@ import io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy;
import io.druid.indexing.worker.TaskAnnouncement;
import io.druid.indexing.worker.Worker;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
@ -57,6 +58,7 @@ import org.junit.Test;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
public class RemoteTaskRunnerTest
{
@ -66,6 +68,7 @@ public class RemoteTaskRunnerTest
private static final String announcementsPath = String.format("%s/indexer/announcements/worker", basePath);
private static final String tasksPath = String.format("%s/indexer/tasks/worker", basePath);
private static final String statusPath = String.format("%s/indexer/status/worker", basePath);
private static final int TIMEOUT_SECONDS = 5;
private TestingCluster testingCluster;
private CuratorFramework cf;
@ -282,7 +285,7 @@ public class RemoteTaskRunnerTest
cf.delete().forPath(joiner.join(statusPath, task.getId()));
TaskStatus status = future.get();
TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);
Assert.assertEquals(status.getStatusCode(), TaskStatus.Status.FAILED);
}
@ -335,7 +338,7 @@ public class RemoteTaskRunnerTest
ListenableFuture<TaskStatus> future = remoteTaskRunner.run(task);
TaskStatus status = future.get();
TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);
Assert.assertEquals(TaskStatus.Status.SUCCESS, status.getStatusCode());
}
@ -353,7 +356,7 @@ public class RemoteTaskRunnerTest
cf.delete().forPath(announcementsPath);
TaskStatus status = future.get();
TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);
Assert.assertEquals(TaskStatus.Status.FAILED, status.getStatusCode());
}
@ -393,14 +396,14 @@ public class RemoteTaskRunnerTest
remoteTaskRunner = new RemoteTaskRunner(
jsonMapper,
config,
new ZkPathsConfig()
new IndexerZkConfig(new ZkPathsConfig()
{
@Override
public String getZkBasePath()
public String getBase()
{
return basePath;
}
},
},null,null,null,null,null),
cf,
new SimplePathChildrenCacheFactory.Builder().build(),
null,

View File

@ -334,7 +334,7 @@ public class TaskLifecycleTest
),
new DefaultObjectMapper()
);
tr = new ThreadPoolTaskRunner(tb);
tr = new ThreadPoolTaskRunner(tb, null);
tq = new TaskQueue(tqc, ts, tr, tac, tl, emitter);
tq.start();
}

View File

@ -50,6 +50,7 @@ import io.druid.segment.loading.LocalDataSegmentPuller;
import io.druid.segment.loading.OmniSegmentLoader;
import io.druid.segment.loading.SegmentLoaderConfig;
import io.druid.segment.loading.StorageLocationConfig;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import junit.framework.Assert;
import org.apache.curator.framework.CuratorFramework;
@ -139,14 +140,15 @@ public class WorkerTaskMonitorTest
workerCuratorCoordinator = new WorkerCuratorCoordinator(
jsonMapper,
new IndexerZkConfig(
new ZkPathsConfig()
{
@Override
public String getZkBasePath()
public String getBase()
{
return basePath;
}
},
},null,null,null,null,null),
new TestRemoteTaskRunnerConfig(),
cf,
worker
@ -180,7 +182,8 @@ public class WorkerTaskMonitorTest
}
)
), jsonMapper
)
),
null
),
new WorkerConfig().setCapacity(1)
);

View File

@ -25,6 +25,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
import io.druid.indexing.worker.Worker;
import io.druid.indexing.worker.WorkerCuratorCoordinator;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.server.initialization.IndexerZkConfig;
import io.druid.server.initialization.ZkPathsConfig;
import junit.framework.Assert;
import org.apache.curator.framework.CuratorFramework;
@ -76,14 +77,14 @@ public class WorkerResourceTest
curatorCoordinator = new WorkerCuratorCoordinator(
jsonMapper,
new ZkPathsConfig()
new IndexerZkConfig(new ZkPathsConfig()
{
@Override
public String getZkBasePath()
public String getBase()
{
return basePath;
}
},
},null,null,null,null,null),
new RemoteTaskRunnerConfig(),
cf,
worker

View File

@ -0,0 +1,214 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.server.initialization;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.name.Names;
import io.druid.curator.CuratorConfig;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.JsonConfigProvider;
import io.druid.guice.JsonConfigurator;
import io.druid.initialization.Initialization;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.UUID;
/**
*
*/
public class IndexerZkConfigTest
{
private static final String indexerPropertyString = "test.druid.zk.paths.indexer";
private static final String zkServiceConfigString = "test.druid.zk.paths";
private static final Collection<String> clobberableProperties = new HashSet<>();
private static final Module simpleZkConfigModule = new Module()
{
@Override
public void configure(Binder binder)
{
binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/test");
binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
// See IndexingServiceModuleHelper
JsonConfigProvider.bind(binder, indexerPropertyString, IndexerZkConfig.class);
JsonConfigProvider.bind(
binder, zkServiceConfigString,
ZkPathsConfig.class
);
}
};
@BeforeClass
public static void setup()
{
for (Field field : IndexerZkConfig.class.getDeclaredFields()) {
if (null != field.getAnnotation(JsonProperty.class)) {
clobberableProperties.add(String.format("%s.%s", indexerPropertyString, field.getName()));
}
}
for (Field field : ZkPathsConfig.class.getDeclaredFields()) {
if (null != field.getAnnotation(JsonProperty.class)) {
clobberableProperties.add(String.format("%s.%s", zkServiceConfigString, field.getName()));
}
}
}
private Properties propertyValues = new Properties();
private int assertions = 0;
@Before
public void setupTest()
{
for (String property : clobberableProperties) {
propertyValues.put(property, UUID.randomUUID().toString());
}
assertions = 0;
}
private void validateEntries(ZkPathsConfig zkPathsConfig)
throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
{
for (Field field : ZkPathsConfig.class.getDeclaredFields()) {
if (null != field.getAnnotation(JsonProperty.class)) {
String property = String.format("%s.%s", zkServiceConfigString, field.getName());
String getter = String.format(
"get%s%s",
field.getName().substring(0, 1).toUpperCase(),
field.getName().substring(1)
);
Method method = ZkPathsConfig.class.getDeclaredMethod(getter);
Assert.assertEquals(propertyValues.get(property), method.invoke(zkPathsConfig));
++assertions;
}
}
}
private void validateEntries(IndexerZkConfig indexerZkConfig)
throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
{
for (Field field : IndexerZkConfig.class.getDeclaredFields()) {
if (null != field.getAnnotation(JsonProperty.class)) {
String property = String.format("%s.%s", indexerPropertyString, field.getName());
String getter = String.format(
"get%s%s",
field.getName().substring(0, 1).toUpperCase(),
field.getName().substring(1)
);
Method method = IndexerZkConfig.class.getDeclaredMethod(getter);
Assert.assertEquals(propertyValues.get(property), method.invoke(indexerZkConfig));
++assertions;
}
}
}
@Test
public void testNullConfig(){
propertyValues.clear();
final Injector injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(),
ImmutableList.<Module>of(simpleZkConfigModule)
);
JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(zkServiceConfigString, ZkPathsConfig.class);
zkPathsConfig.inject(propertyValues, configurator);
JsonConfigProvider<IndexerZkConfig> indexerZkConfig = JsonConfigProvider.of(
indexerPropertyString,
IndexerZkConfig.class
);
indexerZkConfig.inject(propertyValues, configurator);
Assert.assertEquals("/druid/indexer/leaderLatchPath", indexerZkConfig.get().get().getLeaderLatchPath());
}
@Test
public void testSimpleConfig() throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
{
final Injector injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(),
ImmutableList.<Module>of(simpleZkConfigModule)
);
JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(zkServiceConfigString, ZkPathsConfig.class);
zkPathsConfig.inject(propertyValues, configurator);
JsonConfigProvider<IndexerZkConfig> indexerZkConfig = JsonConfigProvider.of(
indexerPropertyString,
IndexerZkConfig.class
);
indexerZkConfig.inject(propertyValues, configurator);
IndexerZkConfig zkConfig = indexerZkConfig.get().get();
ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get().get();
validateEntries(zkConfig);
validateEntries(zkPathsConfig1);
Assert.assertEquals(clobberableProperties.size(), assertions);
}
@Test
public void testExactConfig(){
final Injector injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(),
ImmutableList.<Module>of(simpleZkConfigModule)
);
propertyValues.setProperty(zkServiceConfigString + ".base", "/druid/metrics");
JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(
zkServiceConfigString,
ZkPathsConfig.class
);
zkPathsConfig.inject(propertyValues, configurator);
ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get().get();
IndexerZkConfig indexerZkConfig = new IndexerZkConfig(zkPathsConfig1,null,null,null,null,null);
Assert.assertEquals("indexer", indexerZkConfig.getBase());
Assert.assertEquals("/druid/metrics/indexer/announcements", indexerZkConfig.getAnnouncementsPath());
}
}

View File

@ -0,0 +1,90 @@
Integration Testing
=========================
## Installing Docker and Running
Please refer to instructions at [https://github.com/druid-io/docker-druid/blob/master/docker-install.md](https://github.com/druid-io/docker-druid/blob/master/docker-install.md)
Instead of running
```
boot2docker init
```
run instead
```
boot2docker init -m 6000
```
Make sure that you have at least 6GB of memory available before you run the tests.
Set the docker ip via:
```
export DOCKER_IP=$(boot2docker ip 2>/dev/null)
```
Verify that docker is running by issuing the following command:
```
docker info
```
Running Integration tests
=========================
## Running tests using mvn
To run all the tests using mvn run the following command -
'''''
mvn verify -P integration-tests
'''''
To run only a single test using mvn run following command -
'''''
mvn verify -P integration-tests -Dit.test=<test_name>
'''''
Writing a New Test
===============
## What should we cover in integration tests
For every end-user functionality provided by druid we should have an integration-test verifying the correctness.
## Rules to be followed while writing a new integration test
### Every Integration Test must follow these rules
1) Name of the test must start with a prefix "IT"
2) A test should be independent of other tests
3) Tests are to be written in TestNG style ([http://testng.org/doc/documentation-main.html#methods](http://testng.org/doc/documentation-main.html#methods))
4) If a test loads some data it is the responsibility of the test to clean up the data from the cluster
### How to use Guice Dependency Injection in a test
A test can access different helper and utility classes provided by test-framework in order to access Coordinator,Broker etc..
To mark a test be able to use Guice Dependency Injection -
Annotate the test class with the below annotation
'''''''
@Guice(moduleFactory = DruidTestModuleFactory.class)
'''''''
This will tell the test framework that the test class needs to be constructed using guice.
### Helper Classes provided
1) IntegrationTestingConfig - configuration of the test
2) CoordinatorResourceTestClient - httpclient for coordinator endpoints
3) OverlordResourceTestClient - httpclient for indexer endpoints
4) QueryResourceTestClient - httpclient for broker endpoints
### Static Utility classes
1) RetryUtil - provides methods to retry an operation until it succeeds for configurable no. of times
2) FromFileTestQueryHelper - reads queries with expected results from file and executes them and verifies the results using ResultVerifier
Refer ITIndexerTest as an example on how to use dependency Injection
TODOS
=======================
1) Remove the patch for TestNG after resolution of Surefire-622

View File

@ -0,0 +1,71 @@
FROM ubuntu:14.04
# Add Java 7 repository
RUN apt-get update
RUN apt-get install -y software-properties-common
RUN apt-add-repository -y ppa:webupd8team/java
RUN apt-get update
# Oracle Java 7
RUN echo oracle-java-7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get install -y oracle-java7-installer
RUN apt-get install -y oracle-java7-set-default
# MySQL (Metadata store)
RUN apt-get install -y mysql-server
# Supervisor
RUN apt-get install -y supervisor
# Maven
RUN wget -q -O - http://www.us.apache.org/dist/maven/maven-3/3.2.5/binaries/apache-maven-3.2.5-bin.tar.gz | tar -xzf - -C /usr/local
RUN ln -s /usr/local/apache-maven-3.2.1 /usr/local/apache-maven
RUN ln -s /usr/local/apache-maven/bin/mvn /usr/local/bin/mvn
# Zookeeper
RUN wget -q -O - http://www.us.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xzf - -C /usr/local
RUN cp /usr/local/zookeeper-3.4.6/conf/zoo_sample.cfg /usr/local/zookeeper-3.4.6/conf/zoo.cfg
RUN ln -s /usr/local/zookeeper-3.4.6 /usr/local/zookeeper
# git
RUN apt-get install -y git
# Druid system user
RUN adduser --system --group --no-create-home druid
RUN mkdir -p /var/lib/druid
RUN chown druid:druid /var/lib/druid
# Add druid jars
ADD lib/* /usr/local/druid/lib/
WORKDIR /
# Setup metadata store
RUN /etc/init.d/mysql start && echo "GRANT ALL ON druid.* TO 'druid'@'%' IDENTIFIED BY 'diurd'; CREATE database druid;" | mysql -u root && /etc/init.d/mysql stop
# Add sample data
RUN /etc/init.d/mysql start && java -Ddruid.metadata.storage.type=mysql -cp "/usr/local/druid/lib/*" io.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd && /etc/init.d/mysql stop
ADD sample-data.sql sample-data.sql
RUN /etc/init.d/mysql start && cat sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop
# Setup supervisord
ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Clean up
RUN apt-get clean && rm -rf /tmp/* /var/tmp/*
# Expose ports:
# - 8081: HTTP (coordinator)
# - 8082: HTTP (broker)
# - 8083: HTTP (historical)
# - 3306: MySQL
# - 2181 2888 3888: ZooKeeper
# - 8100 8101 8102 8103 8104 : peon ports
EXPOSE 8081
EXPOSE 8082
EXPOSE 8083
EXPOSE 3306
EXPOSE 2181 2888 3888
EXPOSE 8100 8101 8102 8103 8104
WORKDIR /var/lib/druid
ENTRYPOINT export HOST_IP="$(resolveip -s $HOSTNAME)" && exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf

View File

@ -0,0 +1,29 @@
[program:druid-broker]
command=java
-server
-Xmx1g
-Xms1g
-XX:NewSize=500m
-XX:MaxNewSize=500m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.processing.buffer.sizeBytes=75000000
-Ddruid.server.http.numThreads=100
-Ddruid.processing.numThreads=1
-Ddruid.broker.http.numConnections=30
-Ddruid.broker.http.readTimeout=PT5M
-Ddruid.broker.cache.useCache=true
-Ddruid.broker.cache.populateCache=true
-Ddruid.cache.type=local
-Ddruid.cache.sizeInBytes=40000000
-cp /usr/local/druid/lib/*
io.druid.cli.Main server broker
redirect_stderr=true
autorestart=false
priority=100
stdout_logfile=/shared/logs/broker.log

View File

@ -0,0 +1,23 @@
[program:druid-coordinator]
command=java
-server
-Xmx128m
-Xms128m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.metadata.storage.type=mysql
-Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://druid-metadata-storage/druid
-Ddruid.metadata.storage.connector.user=druid
-Ddruid.metadata.storage.connector.password=diurd
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.coordinator.startDelay=PT5S
-cp /usr/local/druid/lib/*
io.druid.cli.Main server coordinator
redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=/shared/logs/coordinator.log

View File

@ -0,0 +1,27 @@
[program:druid-historical]
command=java
-server
-Xmx1500m
-Xms1500m
-XX:NewSize=750m
-XX:MaxNewSize=750m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ
-Ddruid.s3.secretKey=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
-Ddruid.processing.buffer.sizeBytes=75000000
-Ddruid.processing.numThreads=3
-Ddruid.server.http.numThreads=100
-Ddruid.segmentCache.locations="[{\"path\":\"/shared/druid/indexCache\",\"maxSize\":5000000000}]"
-Ddruid.server.maxSize=5000000000
-cp /usr/local/druid/lib/*
io.druid.cli.Main server historical
redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=/shared/logs/historical.log

View File

@ -0,0 +1,6 @@
[program:mysql]
command=/usr/bin/pidproxy /var/run/mysqld/mysqld.pid /usr/bin/mysqld_safe
--bind-address=0.0.0.0
user=mysql
priority=0
stdout_logfile=/shared/logs/mysql.log

View File

@ -0,0 +1,29 @@
[program:druid-middlemanager]
command=java
-server
-Xmx64m
-Xms64m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.indexer.logs.directory=/shared/tasklogs
-Ddruid.storage.storageDirectory=/shared/storage
-Ddruid.indexer.runner.javaOpts=-server -Xmx256m -Xms256m -XX:NewSize=128m -XX:MaxNewSize=128m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
-Ddruid.indexer.fork.property.druid.processing.buffer.sizeBytes=75000000
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.server.http.numThreads=100
-Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ
-Ddruid.s3.secretKey=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
-Ddruid.worker.ip=%(ENV_HOST_IP)s
-Ddruid.selectors.indexing.serviceName=druid:overlord
-Ddruid.indexer.task.chathandler.type=announce
-cp /usr/local/druid/lib/*
io.druid.cli.Main server middleManager
redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=/shared/logs/middlemanager.log

View File

@ -0,0 +1,25 @@
[program:druid-overlord]
command=java
-server
-Xmx128m
-Xms128m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.metadata.storage.type=mysql
-Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://druid-metadata-storage/druid
-Ddruid.metadata.storage.connector.user=druid
-Ddruid.metadata.storage.connector.password=diurd
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.indexer.storage.type=metadata
-Ddruid.indexer.logs.directory=/shared/tasklogs
-Ddruid.indexer.runner.type=remote
-cp /usr/local/druid/lib/*
io.druid.cli.Main server overlord
redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=/shared/logs/overlord.log

View File

@ -0,0 +1,20 @@
[program:druid-router]
command=java
-server
-Xmx1g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=%(ENV_HOST_IP)s
-Ddruid.zk.service.host=druid-zookeeper
-Ddruid.computation.buffer.size=75000000
-Ddruid.server.http.numThreads=100
-Ddruid.processing.numThreads=1
-cp /usr/local/druid/lib/*
io.druid.cli.Main server router
redirect_stderr=true
priority=100
autorestart=false
stdout_logfile=/shared/logs/router.log

View File

@ -0,0 +1,5 @@
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9','twitterstream','2013-05-13T01:08:18.192Z','2013-01-01T00:00:00.000Z','2013-01-02T00:00:00.000Z',0,'2013-01-02T04:13:41.980Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z\",\"version\":\"2013-01-02T04:13:41.980Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/2013-01-02T04:13:41.980Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":445235220,\"identifier\":\"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9\"}');
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9','twitterstream','2013-05-13T00:03:28.640Z','2013-01-02T00:00:00.000Z','2013-01-03T00:00:00.000Z',0,'2013-01-03T03:44:58.791Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z\",\"version\":\"2013-01-03T03:44:58.791Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z/2013-01-03T03:44:58.791Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":435325540,\"identifier\":\"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9\"}');
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}');
INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');

View File

@ -0,0 +1,6 @@
[supervisord]
nodaemon=true
[include]
files = /usr/lib/druid/conf/*.conf

View File

@ -0,0 +1,5 @@
[program:zookeeper]
command=/usr/local/zookeeper/bin/zkServer.sh start-foreground
user=daemon
priority=0
stdout_logfile=/shared/logs/zookeeper.log

151
integration-tests/pom.xml Normal file
View File

@ -0,0 +1,151 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Druid - a distributed column store.
~ Copyright (C) 2012, 2013 Metamarkets Group Inc.
~
~ This program is free software; you can redistribute it and/or
~ modify it under the terms of the GNU General Public License
~ as published by the Free Software Foundation; either version 2
~ of the License, or (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU General Public License for more details.
~
~ You should have received a copy of the GNU General Public License
~ along with this program; if not, write to the Free Software
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.druid</groupId>
<artifactId>druid-integration-tests</artifactId>
<name>druid-integration-tests</name>
<description>druid-integration-tests</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-common</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-s3-extensions</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>druid-histogram</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid.extensions</groupId>
<artifactId>mysql-metadata-storage</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-services</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
</dependency>
<!-- Tests -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/IT*.java</exclude>
</excludes>
<systemPropertyVariables>
<user.timezone>UTC</user.timezone>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>integration-tests</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<id>build-and-start-druid-cluster</id>
<goals>
<goal>exec</goal>
</goals>
<phase>pre-integration-test</phase>
<configuration>
<executable>${project.basedir}/run_cluster.sh</executable>
</configuration>
</execution>
<execution>
<id>stop-druid-cluster</id>
<goals>
<goal>exec</goal>
</goals>
<phase>post-integration-test</phase>
<configuration>
<executable>${project.basedir}/stop_cluster.sh</executable>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<executions>
<execution>
<id>integration-tests</id>
<phase>integration-test</phase>
<goals>
<goal>integration-test</goal>
<goal>verify</goal>
</goals>
</execution>
</executions>
<configuration>
<argLine>-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dtestrunfactory=org.testng.DruidTestRunnerFactory
-Ddruid.test.config.dockerIp=${env.DOCKER_IP} -Ddruid.zk.service.host=${env.DOCKER_IP}
</argLine>
<suiteXmlFiles>
<suiteXmlFile>src/test/resources/testng.xml</suiteXmlFile>
</suiteXmlFiles>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,49 @@
# cleanup
for node in druid-historical druid-coordinator druid-overlord druid-router druid-broker druid-middlemanager druid-zookeeper druid-metadata-storage;
do
docker stop $node
docker rm $node
done
# environment variables
DIR=$(cd $(dirname $0) && pwd)
DOCKERDIR=$DIR/docker
SHARED_DIR=${HOME}/shared
SUPERVISORDIR=/usr/lib/druid/conf
RESOURCEDIR=$DIR/src/test/resources
# Make directories if they dont exist
mkdir -p $SHARED_DIR/logs
mkdir -p $SHARED_DIR/tasklogs
# install druid jars
rm -rf $SHARED_DIR/docker
cp -R docker $SHARED_DIR/docker
mvn dependency:copy-dependencies -DoutputDirectory=$SHARED_DIR/docker/lib
# Build Druid Cluster Image
docker build -t druid/cluster $SHARED_DIR/docker
# Start zookeeper
docker run -d --name druid-zookeeper -p 2181:2181 -v $SHARED_DIR:/shared -v $DOCKERDIR/zookeeper.conf:$SUPERVISORDIR/zookeeper.conf druid/cluster
# Start MYSQL
docker run -d --name druid-metadata-storage -v $SHARED_DIR:/shared -v $DOCKERDIR/metadata-storage.conf:$SUPERVISORDIR/metadata-storage.conf druid/cluster
# Start Overlord
docker run -d --name druid-overlord -p 8090:8090 -v $SHARED_DIR:/shared -v $DOCKERDIR/overlord.conf:$SUPERVISORDIR/overlord.conf --link druid-metadata-storage:druid-metadata-storage --link druid-zookeeper:druid-zookeeper druid/cluster
# Start Coordinator
docker run -d --name druid-coordinator -p 8081:8081 -v $SHARED_DIR:/shared -v $DOCKERDIR/coordinator.conf:$SUPERVISORDIR/coordinator.conf --link druid-overlord:druid-overlord --link druid-metadata-storage:druid-metadata-storage --link druid-zookeeper:druid-zookeeper druid/cluster
# Start Historical
docker run -d --name druid-historical -v $SHARED_DIR:/shared -v $DOCKERDIR/historical.conf:$SUPERVISORDIR/historical.conf --link druid-zookeeper:druid-zookeeper druid/cluster
# Start Middlemanger
docker run -d --name druid-middlemanager -p 8100:8100 -p 8101:8101 -p 8102:8102 -p 8103:8103 -p 8104:8104 -p 8105:8105 -v $RESOURCEDIR:/resources -v $SHARED_DIR:/shared -v $DOCKERDIR/middlemanager.conf:$SUPERVISORDIR/middlemanager.conf --link druid-zookeeper:druid-zookeeper --link druid-overlord:druid-overlord druid/cluster
# Start Broker
docker run -d --name druid-broker -v $SHARED_DIR:/shared -v $DOCKERDIR/broker.conf:$SUPERVISORDIR/broker.conf --link druid-zookeeper:druid-zookeeper --link druid-middlemanager:druid-middlemanager --link druid-historical:druid-historical druid/cluster
# Start Router
docker run -d --name druid-router -p 8888:8888 -v $SHARED_DIR:/shared -v $DOCKERDIR/router.conf:$SUPERVISORDIR/router.conf --link druid-zookeeper:druid-zookeeper --link druid-coordinator:druid-coordinator --link druid-broker:druid-broker druid/cluster

View File

@ -0,0 +1,67 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import org.apache.commons.io.IOUtils;
import javax.validation.constraints.NotNull;
import java.util.List;
public class DockerConfigProvider implements IntegrationTestingConfigProvider
{
@JsonProperty
@NotNull
private String dockerIp;
@Override
public IntegrationTestingConfig get()
{
return new IntegrationTestingConfig()
{
@Override
public String getCoordinatorHost()
{
return dockerIp+":8081";
}
@Override
public String getIndexerHost()
{
return dockerIp+":8090";
}
@Override
public String getRouterHost()
{
return dockerIp+ ":8888";
}
@Override
public String getMiddleManagerHost()
{
return dockerIp;
}
};
}
}

View File

@ -0,0 +1,33 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing;
/**
*/
public interface IntegrationTestingConfig
{
public String getCoordinatorHost();
public String getIndexerHost();
public String getRouterHost();
public String getMiddleManagerHost();
}

View File

@ -0,0 +1,32 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.google.inject.Provider;
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = DockerConfigProvider.class)
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "docker", value = DockerConfigProvider.class)
})
public interface IntegrationTestingConfigProvider extends Provider<IntegrationTestingConfig>
{
}

View File

@ -0,0 +1,146 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.clients;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.RequestBuilder;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import io.druid.guice.annotations.Global;
import io.druid.testing.IntegrationTestingConfig;
import org.jboss.netty.handler.codec.http.HttpMethod;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.joda.time.Interval;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Map;
public class CoordinatorResourceTestClient
{
private final static Logger LOG = new Logger(CoordinatorResourceTestClient.class);
private final ObjectMapper jsonMapper;
private final HttpClient httpClient;
private final String coordinator;
private final StatusResponseHandler responseHandler;
@Inject
CoordinatorResourceTestClient(
ObjectMapper jsonMapper,
@Global HttpClient httpClient, IntegrationTestingConfig config
)
{
this.jsonMapper = jsonMapper;
this.httpClient = httpClient;
this.coordinator = config.getCoordinatorHost();
this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
}
private String getCoordinatorURL()
{
return String.format(
"http://%s/druid/coordinator/v1/",
coordinator
);
}
private Map<String, Integer> getLoadStatus()
{
Map<String, Integer> status = null;
try {
StatusResponseHolder response = makeRequest(HttpMethod.GET, getCoordinatorURL() + "loadstatus?simple");
status = jsonMapper.readValue(
response.getContent(), new TypeReference<Map<String, Integer>>()
{
}
);
}
catch (Exception e) {
Throwables.propagate(e);
}
return status;
}
public boolean areSegmentsLoaded(String dataSource)
{
final Map<String, Integer> status = getLoadStatus();
return (status.containsKey(dataSource) && status.get(dataSource) == 0);
}
public void unloadSegmentsForDataSource(String dataSource, Interval interval)
{
killDataSource(dataSource, false, interval);
}
public void deleteSegmentsDataSource(String dataSource, Interval interval)
{
killDataSource(dataSource, true, interval);
}
private void killDataSource(String dataSource, boolean kill, Interval interval)
{
try {
makeRequest(
HttpMethod.DELETE,
String.format(
"%sdatasources/%s?kill=%s&interval=%s",
getCoordinatorURL(),
dataSource, kill, URLEncoder.encode(interval.toString(), "UTF-8")
)
);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
private StatusResponseHolder makeRequest(HttpMethod method, String url)
{
try {
StatusResponseHolder response = new RequestBuilder(
this.httpClient,
method, new URL(url)
)
.go(responseHandler)
.get();
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE(
"Error while making request to url[%s] status[%s] content[%s]",
url,
response.getStatus(),
response.getContent()
);
}
return response;
}
catch (Exception e) {
LOG.error(e, "Exception while sending request");
throw Throwables.propagate(e);
}
}
}

View File

@ -0,0 +1,135 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.clients;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.api.client.util.Charsets;
import com.google.common.base.Throwables;
import com.metamx.common.ISE;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import javax.ws.rs.core.MediaType;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
public class EventReceiverFirehoseTestClient
{
private final String host;
private final StatusResponseHandler responseHandler;
private final ObjectMapper jsonMapper;
private final HttpClient httpClient;
private final String chatID;
public EventReceiverFirehoseTestClient(String host, String chatID, ObjectMapper jsonMapper, HttpClient httpClient)
{
this.host = host;
this.jsonMapper = jsonMapper;
this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
this.httpClient = httpClient;
this.chatID = chatID;
}
private String getURL()
{
return String.format(
"http://%s/druid/worker/v1/chat/%s/push-events/",
host,
chatID
);
}
/**
* post events from the collection and return the count of events accepted
*
* @param events Collection of events to be posted
*
* @return
*/
public int postEvents(Collection<Map<String, Object>> events)
{
try {
StatusResponseHolder response = httpClient.post(new URL(getURL()))
.setContent(
MediaType.APPLICATION_JSON,
this.jsonMapper.writeValueAsBytes(events)
)
.go(responseHandler)
.get();
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE(
"Error while posting events to url[%s] status[%s] content[%s]",
getURL(),
response.getStatus(),
response.getContent()
);
}
Map<String, Integer> responseData = jsonMapper.readValue(
response.getContent(), new TypeReference<Map<String, Integer>>()
{
}
);
return responseData.get("eventCount");
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
public int postEventsFromFile(String file)
{
try {
BufferedReader reader = new BufferedReader(
new InputStreamReader(
EventReceiverFirehoseTestClient.class.getResourceAsStream(
file
)
)
);
String s;
Collection<Map<String, Object>> events = new ArrayList<Map<String, Object>>();
while ((s = reader.readLine()) != null) {
events.add(
(Map<String, Object>) this.jsonMapper.readValue(
s, new TypeReference<Map<String, Object>>()
{
}
)
);
}
int eventsPosted = postEvents(events);
if (eventsPosted != events.size()) {
throw new ISE("All events not posted, expected : %d actual : %d", events.size(), eventsPosted);
}
return eventsPosted;
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
}

View File

@ -0,0 +1,213 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.clients;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import io.druid.guice.annotations.Global;
import io.druid.indexing.common.TaskStatus;
import io.druid.indexing.common.task.Task;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.utils.RetryUtil;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
public class OverlordResourceTestClient
{
private final static Logger LOG = new Logger(OverlordResourceTestClient.class);
private final ObjectMapper jsonMapper;
private final HttpClient httpClient;
private final String indexer;
private final StatusResponseHandler responseHandler;
@Inject
OverlordResourceTestClient(
ObjectMapper jsonMapper,
@Global HttpClient httpClient, IntegrationTestingConfig config
)
{
this.jsonMapper = jsonMapper;
this.httpClient = httpClient;
this.indexer = config.getIndexerHost();
this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
}
private String getIndexerURL()
{
return String.format(
"http://%s/druid/indexer/v1/",
indexer
);
}
public String submitTask(Task task)
{
try {
return submitTask(this.jsonMapper.writeValueAsString(task));
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
public String submitTask(String task)
{
try {
StatusResponseHolder response = httpClient.post(new URL(getIndexerURL() + "task"))
.setContent(
"application/json",
task.getBytes()
)
.go(responseHandler)
.get();
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE(
"Error while submitting task to indexer response [%s %s]",
response.getStatus(),
response.getContent()
);
}
Map<String, String> responseData = jsonMapper.readValue(
response.getContent(), new TypeReference<Map<String, String>>()
{
}
);
String taskID = responseData.get("task");
LOG.info("Submitted task with TaskID[%s]", taskID);
return taskID;
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
public TaskStatus.Status getTaskStatus(String taskID)
{
try {
StatusResponseHolder response = makeRequest(
String.format(
"%stask/%s/status",
getIndexerURL(),
URLEncoder.encode(taskID, "UTF-8")
)
);
LOG.info("Index status response" + response.getContent());
Map<String, Object> responseData = jsonMapper.readValue(
response.getContent(), new TypeReference<Map<String, Object>>()
{
}
);
//TODO: figure out a better way to parse the response...
String status = (String) ((Map) responseData.get("status")).get("status");
return TaskStatus.Status.valueOf(status);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
public List<TaskResponseObject> getRunningTasks()
{
return getTasks("runningTasks");
}
public List<TaskResponseObject> getWaitingTasks()
{
return getTasks("waitingTasks");
}
public List<TaskResponseObject> getPendingTasks()
{
return getTasks("pendingTasks");
}
private List<TaskResponseObject> getTasks(String identifier)
{
try {
StatusResponseHolder response = makeRequest(
String.format("%s%s", getIndexerURL(), identifier)
);
LOG.info("Tasks %s response %s", identifier, response.getContent());
return jsonMapper.readValue(
response.getContent(), new TypeReference<List<TaskResponseObject>>()
{
}
);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
public void waitUntilTaskCompletes(final String taskID)
{
RetryUtil.retryUntil(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
TaskStatus.Status status = getTaskStatus(taskID);
if (status == TaskStatus.Status.FAILED) {
throw new ISE("Indexer task FAILED");
}
return status == TaskStatus.Status.SUCCESS;
}
},
true,
60000,
10,
"Index Task to complete"
);
}
private StatusResponseHolder makeRequest(String url)
{
try {
StatusResponseHolder response = this.httpClient
.get(new URL(url))
.go(responseHandler)
.get();
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE("Error while making request to indexer [%s %s]", response.getStatus(), response.getContent());
}
return response;
}
catch (Exception e) {
LOG.error(e, "Exception while sending request");
throw Throwables.propagate(e);
}
}
}

View File

@ -0,0 +1,98 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.clients;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import io.druid.guice.annotations.Global;
import io.druid.query.Query;
import io.druid.testing.IntegrationTestingConfig;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import java.net.URL;
import java.util.List;
import java.util.Map;
public class QueryResourceTestClient
{
private final ObjectMapper jsonMapper;
private final HttpClient httpClient;
private final String router;
private final StatusResponseHandler responseHandler;
@Inject
QueryResourceTestClient(
ObjectMapper jsonMapper,
@Global HttpClient httpClient,
IntegrationTestingConfig config
)
{
this.jsonMapper = jsonMapper;
this.httpClient = httpClient;
this.router = config.getRouterHost();
this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
}
private String getBrokerURL()
{
return String.format(
"http://%s/druid/v2/",
router
);
}
public List<Map<String, Object>> query(Query query)
{
try {
StatusResponseHolder response = httpClient.post(new URL(getBrokerURL()))
.setContent(
"application/json",
jsonMapper.writeValueAsBytes(query)
)
.go(responseHandler)
.get();
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE(
"Error while querying[%s] status[%s] content[%s]",
getBrokerURL(),
response.getStatus(),
response.getContent()
);
}
return jsonMapper.readValue(
response.getContent(), new TypeReference<List<Map<String, Object>>>()
{
}
);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
}

View File

@ -0,0 +1,68 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.clients;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.druid.indexing.common.TaskStatus;
import org.joda.time.DateTime;
public class TaskResponseObject
{
private final String id;
private final DateTime createdTime;
private final DateTime queueInsertionTime;
private final TaskStatus status;
@JsonCreator
private TaskResponseObject(
@JsonProperty("id") String id,
@JsonProperty("createdTime") DateTime createdTime,
@JsonProperty("queueInsertionTime") DateTime queueInsertionTime,
@JsonProperty("status") TaskStatus status
)
{
this.id = id;
this.createdTime = createdTime;
this.queueInsertionTime = queueInsertionTime;
this.status = status;
}
public String getId()
{
return id;
}
public DateTime getCreatedTime()
{
return createdTime;
}
public DateTime getQueueInsertionTime()
{
return queueInsertionTime;
}
public TaskStatus getStatus()
{
return status;
}
}

View File

@ -0,0 +1,54 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.guice;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Supplier;
import com.google.inject.Binder;
import com.google.inject.Module;
import com.google.inject.Provides;
import com.metamx.emitter.core.LoggingEmitter;
import com.metamx.emitter.core.LoggingEmitterConfig;
import com.metamx.emitter.service.ServiceEmitter;
import io.druid.guice.ConfigProvider;
import io.druid.guice.JsonConfigProvider;
import io.druid.guice.LazySingleton;
import io.druid.guice.ManageLifecycle;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.IntegrationTestingConfigProvider;
/**
*/
public class DruidTestModule implements Module
{
@Override
public void configure(Binder binder)
{
binder.bind(IntegrationTestingConfig.class).toProvider(IntegrationTestingConfigProvider.class).in(ManageLifecycle.class);
JsonConfigProvider.bind(binder, "druid.test.config", IntegrationTestingConfigProvider.class);
}
@Provides
@LazySingleton
public ServiceEmitter getServiceEmitter(Supplier<LoggingEmitterConfig> config, ObjectMapper jsonMapper)
{
return new ServiceEmitter("", "", new LoggingEmitter(config.get(), jsonMapper));
}
}

View File

@ -0,0 +1,63 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.guice;
import com.google.common.collect.ImmutableList;
import com.google.inject.Injector;
import com.google.inject.Module;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.IndexingServiceFirehoseModule;
import io.druid.initialization.Initialization;
import org.testng.IModuleFactory;
import org.testng.ITestContext;
import java.util.Collections;
import java.util.List;
public class DruidTestModuleFactory implements IModuleFactory
{
private static final Module module = new DruidTestModule();
private static final Injector injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(),
getModules()
);
public static Injector getInjector()
{
return injector;
}
private static List<? extends Module> getModules()
{
return ImmutableList.of(
new DruidTestModule(),
new IndexingServiceFirehoseModule()
);
}
@Override
public Module createModule(ITestContext context, Class<?> testClass)
{
context.addGuiceModule(DruidTestModule.class, module);
context.addInjector(Collections.singletonList(module), injector);
return module;
}
}

View File

@ -0,0 +1,79 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.utils;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Inject;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import io.druid.testing.clients.QueryResourceTestClient;
import java.util.List;
import java.util.Map;
public class FromFileTestQueryHelper
{
public static Logger LOG = new Logger(FromFileTestQueryHelper.class);
private final QueryResourceTestClient queryClient;
private final ObjectMapper jsonMapper;
@Inject
FromFileTestQueryHelper(ObjectMapper jsonMapper, QueryResourceTestClient queryClient)
{
this.jsonMapper = jsonMapper;
this.queryClient = queryClient;
}
public void testQueriesFromFile(String filePath, int timesToRun) throws Exception
{
LOG.info("Starting query tests for [%s]", filePath);
List<QueryWithResults> queries =
jsonMapper.readValue(
FromFileTestQueryHelper.class.getResourceAsStream(filePath),
new TypeReference<List<QueryWithResults>>()
{
}
);
for (int i = 0; i < timesToRun; i++) {
LOG.info("Starting Iteration " + i);
boolean failed = false;
for (QueryWithResults queryWithResult : queries) {
LOG.info("Running Query " + queryWithResult.getQuery().getType());
List<Map<String, Object>> result = queryClient.query(queryWithResult.getQuery());
if (!QueryResultVerifier.compareResults(result, queryWithResult.getExpectedResults())) {
LOG.error(
"Failed while executing %s actualResults : %s",
queryWithResult,
jsonMapper.writeValueAsString(result)
);
failed = true;
} else {
LOG.info("Results Verified for Query " + queryWithResult.getQuery().getType());
}
}
if (failed) {
throw new ISE("one or more twitter queries failed");
}
}
}
}

View File

@ -0,0 +1,49 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.utils;
import java.util.Iterator;
import java.util.Map;
public class QueryResultVerifier
{
public static boolean compareResults(
Iterable<Map<String, Object>> actual,
Iterable<Map<String, Object>> expected
)
{
Iterator<Map<String, Object>> actualIter = actual.iterator();
Iterator<Map<String, Object>> expectedIter = expected.iterator();
while (actualIter.hasNext() && expectedIter.hasNext()) {
Map<String, Object> actualRes = actualIter.next();
Map<String, Object> expRes = expectedIter.next();
if (!actualRes.equals(expRes)) {
return false;
}
}
if (actualIter.hasNext() || expectedIter.hasNext()) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,64 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.utils;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.druid.query.Query;
import java.util.List;
import java.util.Map;
public class QueryWithResults
{
private final Query query;
private final List<Map<String, Object>> expectedResults;
@JsonCreator
public QueryWithResults(
@JsonProperty("query") Query query,
@JsonProperty("expectedResults") List<Map<String, Object>> expectedResults
)
{
this.query = query;
this.expectedResults = expectedResults;
}
@JsonProperty
public Query getQuery()
{
return query;
}
@JsonProperty
public List<Map<String, Object>> getExpectedResults()
{
return expectedResults;
}
@Override
public String toString()
{
return "QueryWithResults{" +
"query=" + query +
", expectedResults=" + expectedResults +
'}';
}
}

View File

@ -0,0 +1,76 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.utils;
import com.google.common.base.Throwables;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
public class RetryUtil
{
private static final Logger LOG = new Logger(RetryUtil.class);
public static int DEFAULT_RETRY_COUNT = 10;
public static long DEFAULT_RETRY_SLEEP = TimeUnit.SECONDS.toMillis(30);
public static void retryUntilTrue(Callable<Boolean> callable, String task)
{
retryUntil(callable, true, DEFAULT_RETRY_SLEEP, DEFAULT_RETRY_COUNT, task);
}
public static void retryUntilFalse(Callable<Boolean> callable, String task)
{
retryUntil(callable, false, DEFAULT_RETRY_SLEEP, DEFAULT_RETRY_COUNT, task);
}
public static void retryUntil(
Callable<Boolean> callable,
boolean expectedValue,
long delayInMillis,
int retryCount,
String taskMessage
)
{
try {
int currentTry = 0;
while (callable.call() != expectedValue) {
if (currentTry > retryCount) {
throw new ISE("Max number of retries[%d] exceeded for Task[%s]. Failing.", retryCount, taskMessage);
}
LOG.info(
"Attempt[%d]: Task %s still not complete. Next retry in %d ms",
currentTry, taskMessage, delayInMillis
);
Thread.sleep(delayInMillis);
currentTry++;
}
}
catch (Exception e) {
throw Throwables.propagate(e);
}
}
}

View File

@ -0,0 +1,64 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.testing.utils;
import com.metamx.common.logger.Logger;
import io.druid.client.selector.Server;
import io.druid.curator.discovery.ServerDiscoverySelector;
import java.util.concurrent.Callable;
public class ServerDiscoveryUtil
{
private static final Logger LOG = new Logger(ServerDiscoveryUtil.class);
public static boolean isInstanceReady(ServerDiscoverySelector serviceProvider)
{
try {
Server instance = serviceProvider.pick();
if (instance == null) {
LOG.warn("Unable to find a host");
return false;
}
}
catch (Exception e) {
LOG.error(e, "Caught exception waiting for host");
return false;
}
return true;
}
public static void waitUntilInstanceReady(final ServerDiscoverySelector serviceProvider, String instanceType)
{
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return isInstanceReady(serviceProvider);
}
},
String.format("Instance %s to get ready", instanceType)
);
}
}

View File

@ -0,0 +1,150 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.testng;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.google.api.client.util.Charsets;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.response.StatusResponseHandler;
import com.metamx.http.client.response.StatusResponseHolder;
import io.druid.guice.annotations.Global;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.RetryUtil;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.testng.internal.IConfiguration;
import org.testng.internal.annotations.IAnnotationFinder;
import org.testng.xml.XmlTest;
import java.net.URL;
import java.util.List;
import java.util.concurrent.Callable;
public class DruidTestRunnerFactory implements ITestRunnerFactory
{
private static final Logger LOG = new Logger(DruidTestRunnerFactory.class);
@Override
public TestRunner newTestRunner(
ISuite suite, XmlTest test, List<IInvokedMethodListener> listeners
)
{
IConfiguration configuration = TestNG.getDefault().getConfiguration();
String outputDirectory = suite.getOutputDirectory();
IAnnotationFinder annotationFinder = configuration.getAnnotationFinder();
Boolean skipFailedInvocationCounts = suite.getXmlSuite().skipFailedInvocationCounts();
return new DruidTestRunner(
configuration,
suite,
test,
outputDirectory,
annotationFinder,
skipFailedInvocationCounts,
listeners
);
}
private static class DruidTestRunner extends TestRunner
{
protected DruidTestRunner(
IConfiguration configuration,
ISuite suite,
XmlTest test,
String outputDirectory,
IAnnotationFinder finder,
boolean skipFailedInvocationCounts,
List<IInvokedMethodListener> invokedMethodListeners
)
{
super(configuration, suite, test, outputDirectory, finder, skipFailedInvocationCounts, invokedMethodListeners);
}
@Override
public void run()
{
Injector injector = DruidTestModuleFactory.getInjector();
IntegrationTestingConfig config = injector.getInstance(IntegrationTestingConfig.class);
HttpClient client = injector.getInstance(Key.get(HttpClient.class, Global.class));
;
waitUntilInstanceReady(client, config.getCoordinatorHost());
waitUntilInstanceReady(client, config.getIndexerHost());
waitUntilInstanceReady(client, config.getRouterHost());
Lifecycle lifecycle = injector.getInstance(Lifecycle.class);
try {
lifecycle.start();
runTests();
}
catch (Exception e) {
e.printStackTrace();
throw Throwables.propagate(e);
}
finally {
lifecycle.stop();
}
}
private void runTests()
{
super.run();
}
public void waitUntilInstanceReady(final HttpClient client, final String host)
{
final StatusResponseHandler handler = new StatusResponseHandler(Charsets.UTF_8);
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
try {
StatusResponseHolder response = client.get(
new URL(
String.format(
"http://%s/status",
host
)
)
)
.go(handler)
.get();
System.out.println(response.getStatus() + response.getContent());
if (response.getStatus().equals(HttpResponseStatus.OK)) {
return true;
} else {
return false;
}
}
catch (Throwable e) {
e.printStackTrace();
return false;
}
}
}, "Waiting for instance to be ready: [" + host + "]"
);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,336 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.testng.remote;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.ParameterException;
import org.testng.CommandLineArgs;
import org.testng.IInvokedMethodListener;
import org.testng.ISuite;
import org.testng.ISuiteListener;
import org.testng.ITestRunnerFactory;
import org.testng.TestNG;
import org.testng.TestNGException;
import org.testng.TestRunner;
import org.testng.collections.Lists;
import org.testng.internal.ClassHelper;
import org.testng.remote.strprotocol.GenericMessage;
import org.testng.remote.strprotocol.IMessageSender;
import org.testng.remote.strprotocol.MessageHelper;
import org.testng.remote.strprotocol.MessageHub;
import org.testng.remote.strprotocol.RemoteTestListener;
import org.testng.remote.strprotocol.SerializedMessageSender;
import org.testng.remote.strprotocol.StringMessageSender;
import org.testng.remote.strprotocol.SuiteMessage;
import org.testng.reporters.JUnitXMLReporter;
import org.testng.reporters.TestHTMLReporter;
import org.testng.xml.XmlSuite;
import org.testng.xml.XmlTest;
import java.util.Arrays;
import java.util.List;
import static org.testng.internal.Utils.defaultIfStringEmpty;
/**
* Class copied from TestNG library ver 6.8.7 to apply a workaround for http://jira.codehaus.org/browse/SUREFIRE-622
* To Locate the PATCHED AREA search for keyword "PATCH" in this class file
* <p/>
* Extension of TestNG registering a remote TestListener.
*
* @author Cedric Beust <cedric@beust.com>
*/
public class RemoteTestNG extends TestNG
{
// The following constants are referenced by the Eclipse plug-in, make sure you
// modify the plug-in as well if you change any of them.
public static final String DEBUG_PORT = "12345";
public static final String DEBUG_SUITE_FILE = "testng-customsuite.xml";
public static final String DEBUG_SUITE_DIRECTORY = System.getProperty("java.io.tmpdir");
public static final String PROPERTY_DEBUG = "testng.eclipse.debug";
public static final String PROPERTY_VERBOSE = "testng.eclipse.verbose";
private static final String LOCALHOST = "localhost";
// End of Eclipse constants.
/**
* Port used for the serialized protocol
*/
private static Integer m_serPort = null;
private static boolean m_debug;
private static boolean m_dontExit;
private static boolean m_ack;
private ITestRunnerFactory m_customTestRunnerFactory;
private String m_host;
/**
* Port used for the string protocol
*/
private Integer m_port = null;
public static void main(String[] args) throws ParameterException
{
CommandLineArgs cla = new CommandLineArgs();
RemoteArgs ra = new RemoteArgs();
new JCommander(Arrays.asList(cla, ra), args);
m_dontExit = ra.dontExit;
if (cla.port != null && ra.serPort != null) {
throw new TestNGException(
"Can only specify one of " + CommandLineArgs.PORT
+ " and " + RemoteArgs.PORT
);
}
m_debug = cla.debug;
m_ack = ra.ack;
if (m_debug) {
// while (true) {
initAndRun(args, cla, ra);
// }
} else {
initAndRun(args, cla, ra);
}
}
private static void initAndRun(String[] args, CommandLineArgs cla, RemoteArgs ra)
{
RemoteTestNG remoteTestNg = new RemoteTestNG();
if (m_debug) {
// In debug mode, override the port and the XML file to a fixed location
cla.port = Integer.parseInt(DEBUG_PORT);
ra.serPort = cla.port;
cla.suiteFiles = Arrays.asList(
new String[]{
DEBUG_SUITE_DIRECTORY + DEBUG_SUITE_FILE
}
);
}
remoteTestNg.configure(cla);
remoteTestNg.setHost(cla.host);
m_serPort = ra.serPort;
remoteTestNg.m_port = cla.port;
if (isVerbose()) {
StringBuilder sb = new StringBuilder("Invoked with ");
for (String s : args) {
sb.append(s).append(" ");
}
p(sb.toString());
// remoteTestNg.setVerbose(1);
// } else {
// remoteTestNg.setVerbose(0);
}
validateCommandLineParameters(cla);
remoteTestNg.run();
// if (m_debug) {
// // Run in a loop if in debug mode so it is possible to run several launches
// // without having to relauch RemoteTestNG.
// while (true) {
// remoteTestNg.run();
// remoteTestNg.configure(cla);
// }
// } else {
// remoteTestNg.run();
// }
}
private static void p(String s)
{
if (isVerbose()) {
System.out.println("[RemoteTestNG] " + s);
}
}
public static boolean isVerbose()
{
boolean result = System.getProperty(PROPERTY_VERBOSE) != null || isDebug();
return result;
}
public static boolean isDebug()
{
return m_debug || System.getProperty(PROPERTY_DEBUG) != null;
}
private void calculateAllSuites(List<XmlSuite> suites, List<XmlSuite> outSuites)
{
for (XmlSuite s : suites) {
outSuites.add(s);
// calculateAllSuites(s.getChildSuites(), outSuites);
}
}
@Override
public void run()
{
IMessageSender sender = m_serPort != null
? new SerializedMessageSender(m_host, m_serPort, m_ack)
: new StringMessageSender(m_host, m_port);
final MessageHub msh = new MessageHub(sender);
msh.setDebug(isDebug());
try {
msh.connect();
// We couldn't do this until now in debug mode since the .xml file didn't exist yet.
// Now that we have connected with the Eclipse client, we know that it created the .xml
// file so we can proceed with the initialization
initializeSuitesAndJarFile();
List<XmlSuite> suites = Lists.newArrayList();
calculateAllSuites(m_suites, suites);
// System.out.println("Suites: " + m_suites.get(0).getChildSuites().size()
// + " and:" + suites.get(0).getChildSuites().size());
if (suites.size() > 0) {
int testCount = 0;
for (int i = 0; i < suites.size(); i++) {
testCount += (suites.get(i)).getTests().size();
}
GenericMessage gm = new GenericMessage(MessageHelper.GENERIC_SUITE_COUNT);
gm.setSuiteCount(suites.size());
gm.setTestCount(testCount);
msh.sendMessage(gm);
addListener(new RemoteSuiteListener(msh));
setTestRunnerFactory(new DelegatingTestRunnerFactory(buildTestRunnerFactory(), msh));
// System.out.println("RemoteTestNG starting");
super.run();
} else {
System.err.println("No test suite found. Nothing to run");
}
}
catch (Throwable cause) {
cause.printStackTrace(System.err);
}
finally {
// System.out.println("RemoteTestNG finishing: " + (getEnd() - getStart()) + " ms");
msh.shutDown();
if (!m_debug && !m_dontExit) {
System.exit(0);
}
}
}
/**
* Override by the plugin if you need to configure differently the <code>TestRunner</code>
* (usually this is needed if different listeners/reporters are needed).
* <b>Note</b>: you don't need to worry about the wiring listener, because it is added
* automatically.
*/
protected ITestRunnerFactory buildTestRunnerFactory()
{
//################### PATCH STARTS
if (System.getProperty("testrunfactory") != null) {
m_customTestRunnerFactory = (ITestRunnerFactory) ClassHelper.newInstance(
ClassHelper.fileToClass(
System.getProperty(
"testrunfactory"
)
)
);
//################## PATCH ENDS
} else if (null == m_customTestRunnerFactory) {
m_customTestRunnerFactory = new ITestRunnerFactory()
{
@Override
public TestRunner newTestRunner(
ISuite suite, XmlTest xmlTest,
List<IInvokedMethodListener> listeners
)
{
TestRunner runner =
new TestRunner(
getConfiguration(), suite, xmlTest,
false /*skipFailedInvocationCounts */,
listeners
);
if (m_useDefaultListeners) {
runner.addListener(new TestHTMLReporter());
runner.addListener(new JUnitXMLReporter());
}
return runner;
}
};
}
return m_customTestRunnerFactory;
}
private String getHost()
{
return m_host;
}
public void setHost(String host)
{
m_host = defaultIfStringEmpty(host, LOCALHOST);
}
private int getPort()
{
return m_port;
}
/**
* A ISuiteListener wiring the results using the internal string-based protocol.
*/
private static class RemoteSuiteListener implements ISuiteListener
{
private final MessageHub m_messageSender;
RemoteSuiteListener(MessageHub smsh)
{
m_messageSender = smsh;
}
@Override
public void onFinish(ISuite suite)
{
m_messageSender.sendMessage(new SuiteMessage(suite, false /*start*/));
}
@Override
public void onStart(ISuite suite)
{
m_messageSender.sendMessage(new SuiteMessage(suite, true /*start*/));
}
}
private static class DelegatingTestRunnerFactory implements ITestRunnerFactory
{
private final ITestRunnerFactory m_delegateFactory;
private final MessageHub m_messageSender;
DelegatingTestRunnerFactory(ITestRunnerFactory trf, MessageHub smsh)
{
m_delegateFactory = trf;
m_messageSender = smsh;
}
@Override
public TestRunner newTestRunner(
ISuite suite, XmlTest test,
List<IInvokedMethodListener> listeners
)
{
TestRunner tr = m_delegateFactory.newTestRunner(suite, test, listeners);
tr.addListener(new RemoteTestListener(suite, test, m_messageSender));
return tr;
}
}
}

View File

@ -0,0 +1,85 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.indexer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Inject;
import io.druid.testing.clients.CoordinatorResourceTestClient;
import io.druid.testing.clients.OverlordResourceTestClient;
import io.druid.testing.utils.FromFileTestQueryHelper;
import io.druid.testing.utils.RetryUtil;
import org.apache.commons.io.IOUtils;
import org.joda.time.Interval;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.concurrent.Callable;
public abstract class AbstractIndexerTest
{
@Inject
protected CoordinatorResourceTestClient coordinator;
@Inject
protected OverlordResourceTestClient indexer;
@Inject
protected ObjectMapper jsonMapper;
@Inject
protected FromFileTestQueryHelper queryHelper;
protected void unloadAndKillData(final String dataSource) throws Exception
{
Interval interval = new Interval("2013-01-01T00:00:00.000Z/2013-12-01T00:00:00.000Z");
coordinator.unloadSegmentsForDataSource(dataSource, interval);
RetryUtil.retryUntilFalse(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinator.areSegmentsLoaded(dataSource);
}
}, "Segment Unloading"
);
coordinator.deleteSegmentsDataSource(dataSource, interval);
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return (indexer.getPendingTasks().size() + indexer.getRunningTasks().size() + indexer.getWaitingTasks()
.size()) == 0;
}
}, "Waiting for Tasks Completion"
);
}
protected String getTaskAsString(String file) throws IOException
{
InputStream inputStream = ITRealtimeIndexTaskTest.class.getResourceAsStream(file);
StringWriter writer = new StringWriter();
IOUtils.copy(inputStream, writer, "UTF-8");
return writer.toString();
}
}

View File

@ -0,0 +1,81 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.indexer;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.logger.Logger;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.RetryUtil;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.util.concurrent.Callable;
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITIndexerTest extends AbstractIndexerTest
{
private static final Logger LOG = new Logger(ITIndexerTest.class);
private static String INDEX_TASK = "/indexer/wikipedia_index_task.json";
private static String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static String INDEX_DATASOURCE = "wikipedia_index_test";
@Inject
private IntegrationTestingConfig config;
@Test
public void testIndexData() throws Exception
{
loadData();
try {
queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
}
catch (Exception e) {
e.printStackTrace();
Throwables.propagate(e);
}
finally {
unloadAndKillData(INDEX_DATASOURCE);
}
}
private void loadData() throws Exception
{
final String taskID = indexer.submitTask(getTaskAsString(INDEX_TASK));
LOG.info("TaskID for loading index task %s", taskID);
indexer.waitUntilTaskCompletes(taskID);
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinator.areSegmentsLoaded(INDEX_DATASOURCE);
}
}, "Segment Load"
);
}
}

View File

@ -0,0 +1,142 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.indexer;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import io.druid.curator.discovery.ServerDiscoveryFactory;
import io.druid.curator.discovery.ServerDiscoverySelector;
import io.druid.guice.annotations.Global;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.clients.EventReceiverFirehoseTestClient;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.RetryUtil;
import io.druid.testing.utils.ServerDiscoveryUtil;
import org.joda.time.DateTime;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
/**
* Steps
* 1) Submit a RealtimeIndexTask
* 2) Load Data using EventReceiverFirehose
* 3) Runs queries and verifies that the ingested data is available for queries
* 4) Waits for handover of the segment to historical node
* 5) Queries data from historical node and verifies handover
* 6) Removes and Delete the created Data Segment
*/
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITRealtimeIndexTaskTest extends AbstractIndexerTest
{
private static final Logger LOG = new Logger(ITRealtimeIndexTaskTest.class);
private static final String REALTIME_TASK_RESOURCE = "/indexer/wikipedia_realtime_index_task.json";
private static final String EVENT_RECEIVER_SERVICE_NAME = "eventReceiverServiceName";
private static final String EVENT_DATA_FILE = "/indexer/wikipedia_index_data.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test";
@Inject
ServerDiscoveryFactory factory;
@Inject
@Global
HttpClient httpClient;
@Inject
IntegrationTestingConfig config;
@Test
public void testRealtimeIndexTask() throws Exception
{
try {
// the task will run for 3 minutes and then shutdown itself
String task = setShutOffTime(
getTaskAsString(REALTIME_TASK_RESOURCE),
new DateTime(System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(2))
);
String taskID = indexer.submitTask(task);
postEvents();
// sleep for a while to let the events ingested
TimeUnit.SECONDS.sleep(5);
// should hit the queries on realtime task
this.queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
// wait for the task to complete
indexer.waitUntilTaskCompletes(taskID);
// task should complete only after the segments are loaded by historical node
RetryUtil.retryUntil(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinator.areSegmentsLoaded(INDEX_DATASOURCE);
}
},
true,
60000,
10,
"Real-time generated segments loaded"
);
// run queries on historical nodes
this.queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
}
catch (Exception e) {
e.printStackTrace();
Throwables.propagate(e);
}
finally {
unloadAndKillData(INDEX_DATASOURCE);
}
}
private String setShutOffTime(String taskAsString, DateTime time)
{
return taskAsString.replace("#SHUTOFFTIME", time.toString());
}
public void postEvents() throws Exception
{
final ServerDiscoverySelector eventReceiverSelector = factory.createSelector(EVENT_RECEIVER_SERVICE_NAME);
eventReceiverSelector.start();
try {
ServerDiscoveryUtil.waitUntilInstanceReady(eventReceiverSelector, "Event Receiver");
// Access the docker VM mapped host and port instead of service announced in zookeeper
String host = config.getMiddleManagerHost() + ":" + eventReceiverSelector.pick().getPort();
LOG.info("Event Receiver Found at host %s", host);
EventReceiverFirehoseTestClient client = new EventReceiverFirehoseTestClient(
host,
EVENT_RECEIVER_SERVICE_NAME,
jsonMapper,
httpClient
);
client.postEventsFromFile(EVENT_DATA_FILE);
}
finally {
eventReceiverSelector.stop();
}
}
}

View File

@ -0,0 +1,172 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.indexer;
import com.beust.jcommander.internal.Lists;
import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.google.inject.Inject;
import com.metamx.common.logger.Logger;
import com.metamx.http.client.HttpClient;
import io.druid.curator.discovery.ServerDiscoveryFactory;
import io.druid.curator.discovery.ServerDiscoverySelector;
import io.druid.guice.annotations.Global;
import io.druid.testing.IntegrationTestingConfig;
import io.druid.testing.clients.EventReceiverFirehoseTestClient;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.RetryUtil;
import io.druid.testing.utils.ServerDiscoveryUtil;
import org.joda.time.DateTime;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITUnionQueryTest extends AbstractIndexerTest
{
private static final Logger LOG = new Logger(ITUnionQueryTest.class);
private static final String REALTIME_TASK_RESOURCE = "/indexer/wikipedia_realtime_index_task.json";
private static final String EVENT_RECEIVER_SERVICE_PREFIX = "eventReceiverServiceName";
private static final String UNION_DATA_FILE = "/indexer/wikipedia_index_data.json";
private static final String UNION_QUERIES_RESOURCE = "/indexer/union_queries.json";
private static final String UNION_DATASOURCE = "wikipedia_index_test";
@Inject
ServerDiscoveryFactory factory;
@Inject
@Global
HttpClient httpClient;
@Inject
IntegrationTestingConfig config;
@Test
public void testRealtimeIndexTask() throws Exception
{
final int numTasks = 4;
try {
// Load 4 datasources with same dimensions
String task = setShutOffTime(
getTaskAsString(REALTIME_TASK_RESOURCE),
new DateTime(System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(3))
);
List<String> taskIDs = Lists.newArrayList();
for (int i = 0; i < numTasks; i++) {
taskIDs.add(
indexer.submitTask(
withServiceName(
withDataSource(task, UNION_DATASOURCE + i),
EVENT_RECEIVER_SERVICE_PREFIX + i
)
)
);
}
for (int i = 0; i < numTasks; i++) {
postEvents(i);
}
// sleep for a while to let the events ingested
TimeUnit.SECONDS.sleep(5);
// should hit the queries on realtime task
LOG.info("Running Union Queries..");
this.queryHelper.testQueriesFromFile(UNION_QUERIES_RESOURCE, 2);
// wait for the task to complete
for (int i = 0; i < numTasks; i++) {
indexer.waitUntilTaskCompletes(taskIDs.get(i));
}
// task should complete only after the segments are loaded by historical node
for (int i = 0; i < numTasks; i++) {
final int taskNum = i;
RetryUtil.retryUntil(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinator.areSegmentsLoaded(UNION_DATASOURCE + taskNum);
}
},
true,
60000,
10,
"Real-time generated segments loaded"
);
}
// run queries on historical nodes
this.queryHelper.testQueriesFromFile(UNION_QUERIES_RESOURCE, 2);
}
catch (Exception e) {
e.printStackTrace();
throw Throwables.propagate(e);
}
finally {
for (int i = 0; i < numTasks; i++) {
unloadAndKillData(UNION_DATASOURCE + i);
}
}
}
private String setShutOffTime(String taskAsString, DateTime time)
{
return taskAsString.replace("#SHUTOFFTIME", time.toString());
}
private String withDataSource(String taskAsString, String dataSource)
{
return taskAsString.replace(UNION_DATASOURCE, dataSource);
}
private String withServiceName(String taskAsString, String serviceName)
{
return taskAsString.replace(EVENT_RECEIVER_SERVICE_PREFIX, serviceName);
}
public void postEvents(int id) throws Exception
{
final ServerDiscoverySelector eventReceiverSelector = factory.createSelector(EVENT_RECEIVER_SERVICE_PREFIX + id);
eventReceiverSelector.start();
try {
ServerDiscoveryUtil.waitUntilInstanceReady(eventReceiverSelector, "Event Receiver");
// Access the docker VM mapped host and port instead of service announced in zookeeper
String host = config.getMiddleManagerHost() + ":" + eventReceiverSelector.pick().getPort();
LOG.info("Event Receiver Found at host [%s]", host);
EventReceiverFirehoseTestClient client = new EventReceiverFirehoseTestClient(
host,
EVENT_RECEIVER_SERVICE_PREFIX + id,
jsonMapper,
httpClient
);
client.postEventsFromFile(UNION_DATA_FILE);
}
finally {
eventReceiverSelector.stop();
}
}
}

View File

@ -0,0 +1,65 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.query;
import com.google.inject.Inject;
import io.druid.testing.clients.CoordinatorResourceTestClient;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.FromFileTestQueryHelper;
import io.druid.testing.utils.RetryUtil;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.util.concurrent.Callable;
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITTwitterQueryTest
{
private static final String TWITTER_DATA_SOURCE = "twitterstream";
private static final String TWITTER_QUERIES_RESOURCE = "/queries/twitterstream_queries.json";
@Inject
CoordinatorResourceTestClient coordinatorClient;
@Inject
private FromFileTestQueryHelper queryHelper;
@BeforeMethod
public void before()
{
// ensure that the segments twitter segments are loaded completely
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinatorClient.areSegmentsLoaded(TWITTER_DATA_SOURCE);
}
}, "twitter segment load"
);
}
@Test
public void testQueriesFromFile() throws Exception
{
queryHelper.testQueriesFromFile(TWITTER_QUERIES_RESOURCE, 2);
}
}

View File

@ -0,0 +1,65 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.tests.query;
import com.google.inject.Inject;
import io.druid.testing.clients.CoordinatorResourceTestClient;
import io.druid.testing.guice.DruidTestModuleFactory;
import io.druid.testing.utils.FromFileTestQueryHelper;
import io.druid.testing.utils.RetryUtil;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import java.util.concurrent.Callable;
@Guice(moduleFactory = DruidTestModuleFactory.class)
public class ITWikipediaQueryTest
{
private static final String WIKIPEDIA_DATA_SOURCE = "wikipedia_editstream";
private static final String WIKIPEDIA_QUERIES_RESOURCE = "/queries/wikipedia_editstream_queries.json";
@Inject
private CoordinatorResourceTestClient coordinatorClient;
@Inject
private FromFileTestQueryHelper queryHelper;
@BeforeMethod
public void before()
{
// ensure that twitter segments are loaded completely
RetryUtil.retryUntilTrue(
new Callable<Boolean>()
{
@Override
public Boolean call() throws Exception
{
return coordinatorClient.areSegmentsLoaded(WIKIPEDIA_DATA_SOURCE);
}
}, "wikipedia segment load"
);
}
@Test
public void testQueriesFromFile() throws Exception
{
queryHelper.testQueriesFromFile(WIKIPEDIA_QUERIES_RESOURCE, 2);
}
}

View File

@ -0,0 +1,19 @@
{
"queryType": "select",
"intervals": ["2013-08-31/2013-09-01"],
"dataSource": "wikipedia_index_test",
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "language",
"value": "en"
},
"pagingSpec": {
"threshold": 10
},
"context": {
"useCache": "false",
"populateCache": "false",
"timeout": 60000
}
}

View File

@ -0,0 +1,19 @@
{
"queryType": "select",
"intervals": ["2013-08-31/2013-09-01"],
"dataSource": "wikipedia_reindex_test",
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "language",
"value": "en"
},
"pagingSpec": {
"threshold": 10
},
"context": {
"useCache": "false",
"populateCache": "false",
"timeout": 60000
}
}

View File

@ -0,0 +1,564 @@
[
{
"description": "timeseries, filtered, all aggs, all",
"query": {
"queryType": "timeseries",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "language",
"value": "en"
},
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
},
{
"type": "doubleSum",
"fieldName": "added",
"name": "added"
},
{
"type": "doubleSum",
"fieldName": "deleted",
"name": "deleted"
},
{
"type": "doubleSum",
"fieldName": "delta",
"name": "delta"
}
],
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": {
"added": 2064.0,
"count": 8,
"delta": 748.0,
"deleted": 1316.0,
"rows": 8
}
}
]
},
{
"description": "topN, all aggs, page dim, uniques metric",
"query": {
"queryType": "topN",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
},
{
"type": "doubleSum",
"fieldName": "added",
"name": "added"
},
{
"type": "doubleSum",
"fieldName": "deleted",
"name": "deleted"
},
{
"type": "doubleSum",
"fieldName": "delta",
"name": "delta"
}
],
"dimension": "page",
"metric": "added",
"threshold": 3,
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": [
{
"added": 3620.0,
"count": 4,
"page": "Crimson Typhoon",
"delta": 3600.0,
"deleted": 20.0,
"rows": 4
},
{
"added": 1836.0,
"count": 4,
"page": "Striker Eureka",
"delta": 1320.0,
"deleted": 516.0,
"rows": 4
},
{
"added": 492.0,
"count": 4,
"page": "Cherno Alpha",
"delta": 444.0,
"deleted": 48.0,
"rows": 4
}
]
}
]
},
{
"description": "topN, all aggs, page dim, count metric, postAggs",
"query": {
"queryType": "topN",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
},
{
"type": "doubleSum",
"fieldName": "added",
"name": "added"
},
{
"type": "doubleSum",
"fieldName": "deleted",
"name": "deleted"
},
{
"type": "doubleSum",
"fieldName": "delta",
"name": "delta"
}
],
"postAggregations": [
{
"type": "arithmetic",
"name": "sumOfAddedDeletedConst",
"fn": "+",
"fields": [
{
"type": "fieldAccess",
"name": "added",
"fieldName": "added"
},
{
"type": "arithmetic",
"name": "",
"fn": "+",
"fields": [
{
"type": "fieldAccess",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "constant",
"name": "constant",
"value": 1000
}
]
}
]
}
],
"dimension": "page",
"metric": "added",
"threshold": 3,
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": [
{
"added": 3620.0,
"count": 4,
"page": "Crimson Typhoon",
"delta": 3600.0,
"deleted": 20.0,
"sumOfAddedDeletedConst": 4640.0,
"rows": 4
},
{
"added": 1836.0,
"count": 4,
"page": "Striker Eureka",
"delta": 1320.0,
"deleted": 516.0,
"sumOfAddedDeletedConst": 3352.0,
"rows": 4
},
{
"added": 492.0,
"count": 4,
"page": "Cherno Alpha",
"delta": 444.0,
"deleted": 48.0,
"sumOfAddedDeletedConst": 1540.0,
"rows": 4
}
]
}
]
},
{
"description": "topN, lexicographic, two aggs, language dim, postAggs",
"query": {
"queryType": "topN",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
}
],
"postAggregations": [
{
"type": "arithmetic",
"name": "sumOfRowsAndCount",
"fn": "+",
"fields": [
{
"type": "fieldAccess",
"name": "rows",
"fieldName": "rows"
},
{
"type": "fieldAccess",
"name": "count",
"fieldName": "count"
}
]
}
],
"dimension": "language",
"metric": {
"type": "lexicographic",
"previousStop": "a"
},
"threshold": 3,
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": [
{
"sumOfRowsAndCount": 16.0,
"count": 8,
"language": "en",
"rows": 8
},
{
"sumOfRowsAndCount": 8.0,
"count": 4,
"language": "ja",
"rows": 4
},
{
"sumOfRowsAndCount": 8.0,
"count": 4,
"language": "ru",
"rows": 4
}
]
}
]
},
{
"description": "groupBy, two aggs, namespace dim, postAggs",
"query": {
"queryType": "groupBy",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
}
],
"postAggregations": [
{
"type": "arithmetic",
"name": "sumOfRowsAndCount",
"fn": "+",
"fields": [
{
"type": "fieldAccess",
"name": "rows",
"fieldName": "rows"
},
{
"type": "fieldAccess",
"name": "count",
"fieldName": "count"
}
]
}
],
"dimensions": ["namespace"],
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"version": "v1",
"timestamp": "2013-08-31T00:00:00.000Z",
"event": {
"sumOfRowsAndCount": 16.0,
"count": 8,
"rows": 8,
"namespace": "article"
}
},
{
"version": "v1",
"timestamp": "2013-08-31T00:00:00.000Z",
"event": {
"sumOfRowsAndCount": 24.0,
"count": 12,
"rows": 12,
"namespace": "wikipedia"
}
}
]
},
{
"description": "groupBy, two aggs, namespace + robot dim, postAggs",
"query": {
"queryType": "groupBy",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"intervals": ["2013-08-31/2013-09-01"],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "longSum",
"fieldName": "count",
"name": "count"
}
],
"postAggregations": [
{
"type": "arithmetic",
"name": "sumOfRowsAndCount",
"fn": "+",
"fields": [
{
"type": "fieldAccess",
"name": "rows",
"fieldName": "rows"
},
{
"type": "fieldAccess",
"name": "count",
"fieldName": "count"
}
]
}
],
"dimensions": ["namespace", "robot"],
"limitSpec": {
"type": "default",
"limit": 3,
"orderBy": ["robot", "namespace"]
},
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"version": "v1",
"timestamp": "2013-08-31T00:00:00.000Z",
"event": {
"sumOfRowsAndCount": 8.0,
"count": 4,
"robot": "false",
"rows": 4,
"namespace": "article"
}
},
{
"version": "v1",
"timestamp": "2013-08-31T00:00:00.000Z",
"event": {
"sumOfRowsAndCount": 8.0,
"count": 4,
"robot": "true",
"rows": 4,
"namespace": "article"
}
},
{
"version": "v1",
"timestamp": "2013-08-31T00:00:00.000Z",
"event": {
"sumOfRowsAndCount": 24.0,
"count": 12,
"robot": "true",
"rows": 12,
"namespace": "wikipedia"
}
}
]
},
{
"query": {
"queryType": "search",
"intervals": ["2013-08-31/2013-09-01"],
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
},
"granularity": "all",
"query": {
"type": "insensitive_contains",
"value": "ip"
},
"context": {
"useCache": "true",
"populateCache": "true",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T00:00:00.000Z",
"result": [
{
"dimension": "user",
"value": "triplets"
},
{
"dimension": "namespace",
"value": "wikipedia"
}
]
}
]
},
{
"description": "timeboundary, 1 agg, union",
"query": {
"queryType": "timeBoundary",
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
"wikipedia_index_test0"
]
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": {
"minTime": "2013-08-31T01:02:33.000Z",
"maxTime": "2013-08-31T12:41:27.000Z"
}
}
]
}
]

View File

@ -0,0 +1,121 @@
{
"query": {
"queryType": "select",
"intervals": ["2013-08-31/2013-09-01"],
"dataSource": {
"type": "union",
"dataSources": [
"wikipedia_index_test"
]
},
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "language",
"value": "en"
},
"pagingSpec": {
"threshold": 10
},
"context": {
"useCache": "false",
"populateCache": "false",
"timeout": 60000
}
},
"expectedResults": [
{
"timestamp": "2013-08-31T01:02:33.000Z",
"result": {
"pagingIdentifiers": {
"wikipedia_index_test0_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z": 0,
"wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z": 0,
"wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z": 0,
"wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z": 0
},
"events": [
{
"segmentId": "wikipedia_index_test0_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T01:02:33.000Z",
"page": "Gypsy Danger",
"added": 57.0,
"deleted": 200.0
}
},
{
"segmentId": "wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T01:02:33.000Z",
"page": "Gypsy Danger",
"added": 57.0,
"deleted": 200.0
}
},
{
"segmentId": "wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T01:02:33.000Z",
"page": "Gypsy Danger",
"added": 57.0,
"deleted": 200.0
}
},
{
"segmentId": "wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T01:02:33.000Z",
"page": "Gypsy Danger",
"added": 57.0,
"deleted": 200.0
}
},
{
"segmentId": "wikipedia_index_test0_2013-08-31T0com.metamx.common.ISE: one or more twitter queries failed0:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T03:32:45.000Z",
"page": "Striker Eureka",
"added": 459.0,
"deleted": 129.0
}
},
{
"segmentId": "wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T03:32:45.000Z",
"page": "Striker Eureka",
"added": 459.0,
"deleted": 129.0
}
},
{
"segmentId": "wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T03:32:45.000Z",
"page": "Striker Eureka",
"added": 459.0,
"deleted": 129.0
}
},
{
"segmentId": "wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z",
"offset": 0,
"event": {
"timestamp": "2013-08-31T03:32:45.000Z",
"page": "Striker Eureka",
"added": 459.0,
"deleted": 129.0
}
}
]
}
}
]
}

View File

@ -0,0 +1,5 @@
{"timestamp": "2013-08-31T01:02:33Z", "page": "Gypsy Danger", "language" : "en", "user" : "nuclear", "unpatrolled" : "true", "newPage" : "true", "robot": "false", "anonymous": "false", "namespace":"article", "continent":"North America", "country":"United States", "region":"Bay Area", "city":"San Francisco", "added": 57, "deleted": 200, "delta": -143}
{"timestamp": "2013-08-31T03:32:45Z", "page": "Striker Eureka", "language" : "en", "user" : "speed", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Australia", "country":"Australia", "region":"Cantebury", "city":"Syndey", "added": 459, "deleted": 129, "delta": 330}
{"timestamp": "2013-08-31T07:11:21Z", "page": "Cherno Alpha", "language" : "ru", "user" : "masterYi", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"article", "continent":"Asia", "country":"Russia", "region":"Oblast", "city":"Moscow", "added": 123, "deleted": 12, "delta": 111}
{"timestamp": "2013-08-31T11:58:39Z", "page": "Crimson Typhoon", "language" : "zh", "user" : "triplets", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"China", "region":"Shanxi", "city":"Taiyuan", "added": 905, "deleted": 5, "delta": 900}
{"timestamp": "2013-08-31T12:41:27Z", "page": "Coyote Tango", "language" : "ja", "user" : "stringer", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"Japan", "region":"Kanto", "city":"Tokyo", "added": 1, "deleted": 10, "delta": -9}

View File

@ -0,0 +1,16 @@
[
{
"description": "timeseries, 1 agg, all",
"query":{
"queryType" : "timeBoundary",
"dataSource": "wikipedia_index_test"
},
"expectedResults":[ {
"timestamp" : "2013-08-31T01:02:33.000Z",
"result" : {
"minTime" : "2013-08-31T01:02:33.000Z",
"maxTime" : "2013-08-31T12:41:27.000Z"
}
} ]
}
]

View File

@ -0,0 +1,59 @@
{
"type": "index",
"spec": {
"dataSchema": {
"dataSource": "wikipedia_index_test",
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second",
"intervals" : [ "2013-08-31/2013-09-01" ]
},
"parser": {
"parseSpec": {
"format" : "json",
"timestampSpec": {
"column": "timestamp"
},
"dimensionsSpec": {
"dimensions": [
"page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous",
"namespace", "continent", "country", "region", "city"
]
}
}
}
},
"ioConfig": {
"type": "index",
"firehose": {
"type": "local",
"baseDir": "/resources/indexer",
"filter": "wikipedia_index_data.json"
}
},
"tuningConfig": {
"type": "index"
}
}
}

View File

@ -0,0 +1,71 @@
{
"type": "index_realtime",
"spec": {
"dataSchema": {
"dataSource": "wikipedia_index_test",
"metricsSpec": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
}
],
"granularitySpec": {
"segmentGranularity": "DAY",
"queryGranularity": "second"
},
"parser": {
"type" : "map",
"parseSpec": {
"timestampSpec": {
"column": "timestamp",
"format": "iso"
},
"dimensionsSpec" : {
"dimensions": [
"page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous",
"namespace", "continent", "country", "region", "city"
]
}
}
}
},
"ioConfig": {
"type": "realtime",
"firehose": {
"type": "timed",
"shutoffTime": "#SHUTOFFTIME",
"delegate": {
"type": "receiver",
"serviceName": "eventReceiverServiceName",
"bufferSize": 100000
}
}
},
"tuningConfig": {
"type": "realtime",
"maxRowsInMemory": 1,
"intermediatePersistPeriod": "PT1M",
"windowPeriod": "PT1M",
"rejectionPolicy": {
"type": "none"
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More