Merge branch 'master' into ingestmetadata-query

2025-02-17 15:35:56 +00:00 · 2015-01-15 18:00:31 +05:30 · 2015-01-15 18:00:31 +05:30 · c7452b75f6
commit c7452b75f6
parent 80e4b68ee7 6f1537d153
180 changed files with 11432 additions and 1469 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,7 @@
+language: java
+
+jdk:
+  - oraclejdk7
+  - oraclejdk8
+
+sudo: false
--- a/common/pom.xml
+++ b/common/pom.xml
@ -20,157 +20,157 @@

 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>io.druid</groupId>
-  <artifactId>druid-common</artifactId>
-  <name>druid-common</name>
-  <description>druid-common</description>
-
-  <parent>
+    <modelVersion>4.0.0</modelVersion>
    <groupId>io.druid</groupId>
-    <artifactId>druid</artifactId>
-    <version>0.7.0-SNAPSHOT</version>
-  </parent>
+    <artifactId>druid-common</artifactId>
+    <name>druid-common</name>
+    <description>druid-common</description>

-  <dependencies>
-    <dependency>
-      <groupId>com.metamx</groupId>
-      <artifactId>java-util</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.druid</groupId>
-      <artifactId>druid-api</artifactId>
-    </dependency>
+    <parent>
+        <groupId>io.druid</groupId>
+        <artifactId>druid</artifactId>
+        <version>0.7.0-SNAPSHOT</version>
+    </parent>

-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-dbcp2</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>commons-pool</groupId>
-      <artifactId>commons-pool</artifactId>
-      <version>1.6</version>
-    </dependency>
-    <dependency>
-      <groupId>org.skife.config</groupId>
-      <artifactId>config-magic</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.hibernate</groupId>
-      <artifactId>hibernate-validator</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>javax.validation</groupId>
-      <artifactId>validation-api</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.datatype</groupId>
-      <artifactId>jackson-datatype-guava</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.datatype</groupId>
-      <artifactId>jackson-datatype-joda</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.dataformat</groupId>
-      <artifactId>jackson-dataformat-smile</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject</groupId>
-      <artifactId>guice</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject.extensions</groupId>
-      <artifactId>guice-multibindings</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.jdbi</groupId>
-      <artifactId>jdbi</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-    </dependency>
+    <dependencies>
+        <dependency>
+            <groupId>com.metamx</groupId>
+            <artifactId>java-util</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>io.druid</groupId>
+            <artifactId>druid-api</artifactId>
+        </dependency>

-    <!-- Tests -->
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.easymock</groupId>
-      <artifactId>easymock</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.metamx</groupId>
-      <artifactId>java-util</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-dbcp2</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-pool</groupId>
+            <artifactId>commons-pool</artifactId>
+            <version>1.6</version>
+        </dependency>
+        <dependency>
+            <groupId>org.skife.config</groupId>
+            <artifactId>config-magic</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.hibernate</groupId>
+            <artifactId>hibernate-validator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>javax.validation</groupId>
+            <artifactId>validation-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-annotations</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.datatype</groupId>
+            <artifactId>jackson-datatype-guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.datatype</groupId>
+            <artifactId>jackson-datatype-joda</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.dataformat</groupId>
+            <artifactId>jackson-dataformat-smile</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.inject</groupId>
+            <artifactId>guice</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.inject.extensions</groupId>
+            <artifactId>guice-multibindings</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.jdbi</groupId>
+            <artifactId>jdbi</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>joda-time</groupId>
+            <artifactId>joda-time</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+        </dependency>

-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <archive>
-            <manifest>
-              <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
-              <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
-            </manifest>
-          </archive>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
+        <!-- Tests -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.easymock</groupId>
+            <artifactId>easymock</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.metamx</groupId>
+            <artifactId>java-util</artifactId>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>attach-sources</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                            <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+                        </manifest>
+                    </archive>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
 </project>
--- a/common/src/main/java/io/druid/concurrent/Execs.java
+++ b/common/src/main/java/io/druid/concurrent/Execs.java
@ -59,7 +59,8 @@ public class Execs

  /**
   * @param nameFormat nameformat for threadFactory
-   * @param capacity maximum capacity after which the executorService will block on accepting new tasks
+   * @param capacity   maximum capacity after which the executorService will block on accepting new tasks
+   *
   * @return ExecutorService which blocks accepting new tasks when the capacity reached
   */
  public static ExecutorService newBlockingSingleThreaded(final String nameFormat, final int capacity)
--- a/common/src/main/java/io/druid/timeline/TimelineLookup.java
+++ b/common/src/main/java/io/druid/timeline/TimelineLookup.java
@ -0,0 +1,44 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.timeline;
+
+import io.druid.timeline.partition.PartitionHolder;
+import org.joda.time.Interval;
+
+import java.util.List;
+
+
+public interface TimelineLookup<VersionType, ObjectType>
+{
+
+  /**
+   * Does a lookup for the objects representing the given time interval.  Will *only* return
+   * PartitionHolders that are complete.
+   *
+   * @param interval interval to find objects for
+   *
+   * @return Holders representing the interval that the objects exist for, PartitionHolders
+   *         are guaranteed to be complete
+   */
+  public Iterable<TimelineObjectHolder<VersionType, ObjectType>> lookup(Interval interval);
+
+  public PartitionHolder<ObjectType> findEntry(Interval interval, VersionType version);
+
+}
--- a/common/src/main/java/io/druid/timeline/UnionTimeLineLookup.java
+++ b/common/src/main/java/io/druid/timeline/UnionTimeLineLookup.java
@ -0,0 +1,65 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.timeline;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+import io.druid.timeline.partition.PartitionHolder;
+import org.joda.time.Interval;
+
+
+public class UnionTimeLineLookup<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
+{
+  Iterable<TimelineLookup<VersionType, ObjectType>> delegates;
+
+  public UnionTimeLineLookup(Iterable<TimelineLookup<VersionType, ObjectType>> delegates)
+  {
+    this.delegates = delegates;
+  }
+
+  @Override
+  public Iterable<TimelineObjectHolder<VersionType, ObjectType>> lookup(final Interval interval)
+  {
+    return Iterables.concat(
+        Iterables.transform(
+            delegates,
+            new Function<TimelineLookup<VersionType, ObjectType>, Iterable<TimelineObjectHolder<VersionType, ObjectType>>>()
+            {
+              @Override
+              public Iterable<TimelineObjectHolder<VersionType, ObjectType>> apply(TimelineLookup<VersionType, ObjectType> input)
+              {
+                return input.lookup(interval);
+              }
+            }
+        )
+    );
+  }
+
+  public PartitionHolder<ObjectType> findEntry(Interval interval, VersionType version)
+  {
+    for (TimelineLookup<VersionType, ObjectType> delegate : delegates) {
+      final PartitionHolder<ObjectType> entry = delegate.findEntry(interval, version);
+      if (entry != null) {
+        return entry;
+      }
+    }
+    return null;
+  }
+}
--- a/common/src/main/java/io/druid/timeline/VersionedIntervalTimeline.java
+++ b/common/src/main/java/io/druid/timeline/VersionedIntervalTimeline.java
@ -58,7 +58,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 * to achieve "atomic" updates.  First add new items, then check if those items caused anything to be overshadowed, if
 * so, remove the overshadowed elements and you have effectively updated your data set without any user impact.
 */
-public class VersionedIntervalTimeline<VersionType, ObjectType>
+public class VersionedIntervalTimeline<VersionType, ObjectType> implements TimelineLookup<VersionType, ObjectType>
 {
  private static final Logger log = new Logger(VersionedIntervalTimeline.class);

--- a/common/src/main/resources/log4j.xml
+++ b/common/src/main/resources/log4j.xml
@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  ~ Druid - a distributed column store.
+  ~ Copyright (C) 2012, 2013, 2014  Metamarkets Group Inc.
+  ~
+  ~ This program is free software; you can redistribute it and/or
+  ~ modify it under the terms of the GNU General Public License
+  ~ as published by the Free Software Foundation; either version 2
+  ~ of the License, or (at your option) any later version.
+  ~
+  ~ This program is distributed in the hope that it will be useful,
+  ~ but WITHOUT ANY WARRANTY; without even the implied warranty of
+  ~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  ~ GNU General Public License for more details.
+  ~
+  ~ You should have received a copy of the GNU General Public License
+  ~ along with this program; if not, write to the Free Software
+  ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+  -->
+
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+
+  <appender name="ConsoleAppender" class="org.apache.log4j.ConsoleAppender">
+    <layout class="org.apache.log4j.PatternLayout">
+      <param name="ConversionPattern" value="%d{ISO8601} %p [%t] %c - %m%n"/>
+    </layout>
+  </appender>
+
+  <root>
+    <priority value ="info" />
+    <appender-ref ref="ConsoleAppender"/>
+  </root>
+</log4j:configuration>
--- a/docs/_includes/page_header.html
+++ b/docs/_includes/page_header.html
@ -10,7 +10,7 @@
    </div>
    <div class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
-        <li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
+        <li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/druid-io/druid/wiki">Documentation</a></li>
      </ul>
    </div>
  </div>
--- a/docs/content/Batch-ingestion.md
+++ b/docs/content/Batch-ingestion.md
@ -88,10 +88,27 @@ The spec\_file is a path to a file that contains JSON and an example looks like:
  },
  "tuningConfig" : {
    "type" : "hadoop",
-    "targetPartitionSize" : 5000000,
-    "jobProperties": {
-      "mapreduce.job.queuename": "default"
-    }
+    "workingPath": "/tmp",
+    "partitionsSpec" : {
+      "type" : "dimension",
+      "partitionDimension" : null,
+      "targetPartitionSize" : 5000000,
+      "maxPartitionSize" : 7500000,
+      "assumeGrouped" : false,
+      "numShards" : -1
+    },
+    "shardSpecs" : { },
+    "leaveIntermediate" : false,
+    "cleanupOnFailure" : true,
+    "overwriteFiles" : false,
+    "ignoreInvalidRows" : false,
+    "jobProperties" : { },
+    "combineText" : false,
+    "persistInHeap" : false,
+    "ingestOffheap" : false,
+    "bufferSize" : 134217728,
+    "aggregationBufferRatio" : 0.5,
+    "rowFlushBoundary" : 300000
  }
 }
 ```
--- a/docs/content/Booting-a-production-cluster.md
+++ b/docs/content/Booting-a-production-cluster.md
@ -25,7 +25,7 @@ Clone the code from [https://github.com/druid-io/whirr](https://github.com/druid
    git checkout trunk
    mvn clean install -Dmaven.test.failure.ignore=true

-In order to run the test below, you'll also need two files that available only from a [standard install of Druid](http://druid.io/downloads.html) or the [Druid repo](https://github.com/metamx/druid/tree/master/examples/bin/examples):
+In order to run the test below, you'll also need two files that available only from a [standard install of Druid](http://druid.io/downloads.html) or the [Druid repo](https://github.com/druid-io/druid/tree/master/examples/bin/examples):

 * `druid/examples/bin/examples/wikipedia/wikipedia_realtime.spec`
 * `druid/examples/bin/examples/indexing/wikipedia_realtime_task.json`
--- a/docs/content/Broker-Config.md
+++ b/docs/content/Broker-Config.md
@ -8,34 +8,93 @@ For general Broker Node information, see [here](Broker.html).
 Runtime Configuration
 ---------------------

-The broker module uses several of the default modules in [Configuration](Configuration.html) and has the following set of configurations as well:
+The broker node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
+
+### Node Configs
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
+|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
+|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
+
+### Query Configs
+
+#### Query Prioritization

 |Property|Possible Values|Description|Default|
 |--------|---------------|-----------|-------|
 |`druid.broker.balancer.type`|`random`, `connectionCount`|Determines how the broker balances connections to historical nodes. `random` choose randomly, `connectionCount` picks the node with the fewest number of active connections to|`random`|
 |`druid.broker.select.tier`|`highestPriority`, `lowestPriority`, `custom`|If segments are cross-replicated across tiers in a cluster, you can tell the broker to prefer to select segments in a tier with a certain priority.|`highestPriority`|
 |`druid.broker.select.tier.custom.priorities`|`An array of integer priorities.`|Select servers in tiers with a custom priority list.|None|
-|`druid.broker.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
-|`druid.broker.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
-|`druid.broker.cache.numBackgroundThreads`|Non-negative integer|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best. |`0`|

+#### Concurrent Requests
+
+Druid uses Jetty to serve HTTP requests.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
+|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
+|`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time nodes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|5|
+|`druid.broker.http.readTimeout`|The timeout for data reads.|PT15M|
+
+#### Processing
+
+The broker only uses processing configs for nested groupBy queries.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
+|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
+|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
+|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
+
+#### General Query Configuration
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.chunkPeriod`|Long-interval queries (of any type) may be broken into shorter interval queries, reducing the impact on resources. Use ISO 8601 periods. For example, if this property is set to `P1M` (one month), then a query covering a year would be broken into 12 smaller queries. |0 (off)|
+
+##### GroupBy Query Config
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
+|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
+|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
+
+##### Search Query Config
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
+
+### Caching
+
+You can optionally only configure caching to be enabled on the broker by setting caching configs here.
+
+|Property|Possible Values|Description|Default|
+|--------|---------------|-----------|-------|
+|`druid.broker.cache.useCache`|Enable the cache on the broker.|false|
+|`druid.broker.cache.populateCache`|Populate the cache on the broker.|false|
+|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
+|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|

 #### Local Cache

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.broker.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
-|`druid.broker.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
-|`druid.broker.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
-|`druid.broker.cache.numBackgroundThreads`|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best. |`0`|
-
+|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
+|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
+|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|

 #### Memcache

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.broker.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
-|`druid.broker.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
-|`druid.broker.cache.hosts`|Comma separated list of Memcached hosts `<host:port>`.|none|
-|`druid.broker.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
-|`druid.broker.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
+|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
+|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
+|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
+|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
+|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
--- a/docs/content/Build-from-source.md
+++ b/docs/content/Build-from-source.md
@ -6,7 +6,7 @@ layout: doc_page
 The other way to setup Druid is from source via git. To do so, run these commands:

 ```
-git clone git@github.com:metamx/druid.git
+git clone git@github.com:druid-io/druid.git
 cd druid
 ./build.sh
 ```
--- a/docs/content/Configuration.md
+++ b/docs/content/Configuration.md
@ -4,7 +4,7 @@ layout: doc_page

 # Configuring Druid

-This describes the basic server configuration that is loaded by all Druid server processes; the same file is loaded by all. See also the JSON "specFile" descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
+This describes the common configuration shared by all Druid nodes. These configurations can be defined in the `common.runtime.properties` file.

 ## JVM Configuration Best Practices

@ -14,51 +14,17 @@ There are three JVM parameters that we set on all of our processes:
 2.  `-Dfile.encoding=UTF-8` This is similar to timezone, we test assuming UTF-8. Local encodings might work, but they also might result in weird and interesting bugs.
 3.  `-Djava.io.tmpdir=<a path>` Various parts of the system that interact with the file system do it via temporary files, and these files can get somewhat large. Many production systems are set up to have small (but fast) `/tmp` directories, which can be problematic with Druid so we recommend pointing the JVM’s tmp directory to something with a little more meat.

-## Modules
+### Extensions

-As of Druid v0.6, most core Druid functionality has been compartmentalized into modules. There are a set of default modules that may apply to any node type, and there are specific modules for the different node types. Default modules are __lazily instantiated__. Each module has its own set of configuration. 
-
-This page describes the configuration of the default modules. Node-specific configuration is discussed on each node's respective page. In addition, you can add custom modules to [extend Druid](Modules.html).
-
-Configuration of the various modules is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath.
-
-Note: as a future item, we’d like to consolidate all of the various configuration into a yaml/JSON based configuration file.
-
-### Emitter Module
-
-The Druid servers emit various metrics and alerts via something we call an Emitter. There are two emitter implementations included with the code, one that just logs to log4j ("logging", which is used by default if no emitter is specified) and one that does POSTs of JSON events to a server ("http"). The properties for using the logging emitter are described below.
+Many of Druid's external dependencies can be plugged in as modules. Extensions can be provided using the following configs:

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.emitter`|Setting this value to either "logging" or "http" will instantialize one of the emitter modules.|logging|
+|`druid.extensions.remoteRepositories`|If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/","https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]|
+|`druid.extensions.localRepository`|The local maven directory where extensions are installed. If this is set, remoteRepositories is not required.|[]|
+|`druid.extensions.coordinates`|The list of extensions to include.|[]|

-
-#### Logging Emitter Module
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.emitter.logging.loggerClass`|Choices: HttpPostEmitter, LoggingEmitter, NoopServiceEmitter, ServiceEmitter. The class used for logging.|LoggingEmitter|
-|`druid.emitter.logging.logLevel`|Choices: debug, info, warn, error. The log level at which message are logged.|info|
-
-#### Http Emitter Module
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.emitter.http.timeOut`|The timeout for data reads.|PT5M|
-|`druid.emitter.http.flushMillis`|How often to internal message buffer is flushed (data is sent).|60000|
-|`druid.emitter.http.flushCount`|How many messages can the internal message buffer hold before flushing (sending).|500|
-|`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none|
-
-### Http Client Module
-
-This is the HTTP client used by [Broker](Broker.html) nodes.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time nodes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|5|
-|`druid.broker.http.readTimeout`|The timeout for data reads.|PT15M|
-
-### Curator Module
+### Zookeeper

 Druid uses [Curator](http://curator.incubator.apache.org/) for all [Zookeeper](http://zookeeper.apache.org/) interactions.

@ -68,110 +34,36 @@ Druid uses [Curator](http://curator.incubator.apache.org/) for all [Zookeeper](h
 |`druid.zk.service.sessionTimeoutMs`|ZooKeeper session timeout, in milliseconds.|30000|
 |`druid.curator.compress`|Boolean flag for whether or not created Znodes should be compressed.|false|

-### Announcer Module
-
-The announcer module is used to announce and unannounce Znodes in ZooKeeper (using Curator).
-
-#### ZooKeeper Paths
-
-See [ZooKeeper](ZooKeeper.html).
-
-#### Data Segment Announcer
-
-Data segment announcers are used to announce segments.
+We recommend just setting the base ZK path, but all ZK paths that Druid uses can be overwritten.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|batch|
+|`druid.zk.paths.base`|Base Zookeeper path.|druid|
+|`druid.zk.paths.propertiesPath`|Zookeeper properties path.|druid/properties|
+|`druid.zk.paths.announcementsPath`|Druid node announcement path.|druid/announcements|
+|`druid.zk.paths.liveSegmentsPath`|Current path for where Druid nodes announce their segments.|druid/segments|
+|`druid.zk.paths.loadQueuePath`|Entries here cause historical nodes to load and drop segments.|druid/loadQueue|
+|`druid.zk.paths.coordinatorPath`|Used by the coordinator for leader election.|druid/coordinator|
+|`druid.zk.paths.servedSegmentsPath`|@Deprecated. Legacy path for where Druid nodes announce their segments.|druid/servedSegments|

-##### Single Data Segment Announcer
-
-In legacy Druid, each segment served by a node would be announced as an individual Znode.
-
-##### Batch Data Segment Announcer
-
-In current Druid, multiple data segments may be announced under the same Znode.
+The indexing service also uses its own set of paths. These configs can be included in the common configuration.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.announcer.segmentsPerNode`|Each Znode contains info for up to this many segments.|50|
-|`druid.announcer.maxBytesPerNode`|Max byte size for Znode.|524288|
+|`druid.zk.paths.indexer.announcementsPath`|Middle managers announce themselves here.|druid/indexer/announcements|
+|`druid.zk.paths.indexer.tasksPath`|Used to assign tasks to middle managers.|druid/indexer/tasks|
+|`druid.zk.paths.indexer.statusPath`|Parent path for announcement of task statuses.|druid/indexer/status|
+|`druid.zk.paths.indexer.leaderLatchPath`|Used for Overlord leader election.|druid/indexer/leaderLatchPath|

-### Druid Processing Module
-
-This module contains query processing functionality.
+The following path is used service discovery.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
-|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
-|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
-|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
+|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|/druid/discovery|

+### Request Logging

-### Metrics Module
-
-The metrics module is used to track Druid metrics.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.monitoring.emissionPeriod`|How often metrics are emitted.|PT1m|
-|`druid.monitoring.monitors`|Sets list of Druid monitors used by a node. Each monitor is specified as `com.metamx.metrics.<monitor-name>` (see below for names and more information). For example, you can specify monitors for a Broker with `druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]`.|none (no monitors)|
-
-The following monitors are available:
-
-* CacheMonitor &ndash; Emits metrics (to logs) about the segment results cache for Historical and Broker nodes. Reports typical cache statistics include hits, misses, rates, and size (bytes and number of entries), as well as timeouts and and errors.
-* SysMonitor &ndash; This uses the [SIGAR library](http://www.hyperic.com/products/sigar) to report on various system activities and statuses.
-* ServerMonitor &ndash; Reports statistics on Historical nodes.
-* JvmMonitor &ndash; Reports JVM-related statistics.
-* RealtimeMetricsMonitor &ndash; Reports statistics on Realtime nodes.
-
-### Server Module
-
-This module is used for Druid server nodes.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
-|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
-|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
-
-### Storage Node Module
-
-This module is used by nodes that store data (Historical and Realtime).
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.server.maxSize`|The maximum number of bytes-worth of segments that the node wants assigned to it. This is not a limit that Historical nodes actually enforce, just a value published to the Coordinator node so it can plan accordingly.|0|
-|`druid.server.tier`| A string to name the distribution tier that the storage node belongs to. Many of the [rules Coordinator nodes use](Rule-Configuration.html) to manage segments can be keyed on tiers. |  `_default_tier` |
-|`druid.server.priority`|In a tiered architecture, the priority of the tier, thus allowing control over which nodes are queried. Higher numbers mean higher priority. The default (no priority) works for architecture with no cross replication (tiers that have no data-storage overlap). Data centers typically have equal priority. | 0 |
-
-
-#### Segment Cache
-
-Druid storage nodes maintain information about segments they have already downloaded, and a disk cache to store that data.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.segmentCache.locations`|Segments assigned to a Historical node are first stored on the local file system (in a disk cache) and then served by the Historical node. These locations define where that local cache resides. | none (no caching) |
-|`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true|
-|`druid.segmentCache.dropSegmentDelayMillis`|How long a node delays before completely dropping segment.|30000 (30 seconds)|
-|`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the Coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir|
-|`druid.segmentCache.announceIntervalMillis`|How frequently to announce segments while segments are loading from cache. Set this value to zero to wait for all segments to be loaded before announcing.|5000 (5 seconds)|
-|`druid.segmentCache.numLoadingThreads`|How many segments to load concurrently from from deep storage.|1|
-
-### Jetty Server Module
-
-Druid uses Jetty to serve HTTP requests.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
-|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
-
-### Queryable Module
-
-This module is used by all nodes that can serve queries.
+All nodes that can serve queries can also log the requests they see.

 |Property|Description|Default|
 |--------|-----------|-------|
@ -193,58 +85,54 @@ Every request is emitted to some external location.
 |--------|-----------|-------|
 |`druid.request.logging.feed`|Feed name for requests.|none|

-### Query Runner Factory Module
+### Enabling Metrics

-This module is required by nodes that can serve queries.
+Druid nodes periodically emit metrics and different metrics monitors can be included. Each node can overwrite the default list of monitors.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.query.chunkPeriod`|Long-interval queries (of any type) may be broken into shorter interval queries, reducing the impact on resources. Use ISO 8601 periods. For example, if this property is set to `P1M` (one month), then a query covering a year would be broken into 12 smaller queries. |0 (off)|
+|`druid.monitoring.emissionPeriod`|How often metrics are emitted.|PT1m|
+|`druid.monitoring.monitors`|Sets list of Druid monitors used by a node. Each monitor is specified as `com.metamx.metrics.<monitor-name>` (see below for names and more information). For example, you can specify monitors for a Broker with `druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]`.|none (no monitors)|

-#### GroupBy Query Config
+The following monitors are available:
+
+* CacheMonitor &ndash; Emits metrics (to logs) about the segment results cache for Historical and Broker nodes. Reports typical cache statistics include hits, misses, rates, and size (bytes and number of entries), as well as timeouts and and errors.
+* SysMonitor &ndash; This uses the [SIGAR library](http://www.hyperic.com/products/sigar) to report on various system activities and statuses.
+* ServerMonitor &ndash; Reports statistics on Historical nodes.
+* JvmMonitor &ndash; Reports JVM-related statistics.
+* RealtimeMetricsMonitor &ndash; Reports statistics on Realtime nodes.
+
+### Emitting Metrics
+
+The Druid servers emit various metrics and alerts via something we call an Emitter. There are three emitter implementations included with the code, a "noop" emitter, one that just logs to log4j ("logging", which is used by default if no emitter is specified) and one that does POSTs of JSON events to a server ("http"). The properties for using the logging emitter are described below.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
-|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
-|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
+|`druid.emitter`|Setting this value to "noop", "logging", or "http" will instantialize one of the emitter modules.|logging|

-
-#### Search Query Config
+#### Logging Emitter Module

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
+|`druid.emitter.logging.loggerClass`|Choices: HttpPostEmitter, LoggingEmitter, NoopServiceEmitter, ServiceEmitter. The class used for logging.|LoggingEmitter|
+|`druid.emitter.logging.logLevel`|Choices: debug, info, warn, error. The log level at which message are logged.|info|

-
-### Discovery Module
-
-The discovery module is used for service discovery.
+#### Http Emitter Module

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.discovery.curator.path`|Services announce themselves under this ZooKeeper path.|/druid/discovery|
+|`druid.emitter.http.timeOut`|The timeout for data reads.|PT5M|
+|`druid.emitter.http.flushMillis`|How often to internal message buffer is flushed (data is sent).|60000|
+|`druid.emitter.http.flushCount`|How many messages can the internal message buffer hold before flushing (sending).|500|
+|`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none|

+### Metadata Storage

-#### Indexing Service Discovery Module
-
-This module is used to find the [Indexing Service](Indexing-Service.html) using Curator service discovery.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.selectors.indexing.serviceName`|The druid.service name of the indexing service Overlord node. To start the Overlord with a different name, set it with this property. |overlord|
-
-
-### Server Inventory View Module
-
-This module is used to read announcements of segments in ZooKeeper. The configs are identical to the Announcer Module.
-
-### Database Connector Module
-
-These properties specify the jdbc connection and other configuration around the database. The only processes that connect to the DB with these properties are the [Coordinator](Coordinator.html) and [Indexing service](Indexing-service.html). This is tested on metadata storage.
+These properties specify the jdbc connection and other configuration around the metadata storage. The only processes that connect to the metadata storage with these properties are the [Coordinator](Coordinator.html) and [Indexing service](Indexing-service.html).

 |Property|Description|Default|
 |--------|-----------|-------|
+|`druid.metadata.storage.type`|The type of metadata storage to use. Choose from "mysql", "postgres", or "derby".|derby|
 |`druid.metadata.storage.connector.user`|The username to connect with.|none|
 |`druid.metadata.storage.connector.password`|The password to connect with.|none|
 |`druid.metadata.storage.connector.createTables`|If Druid requires a table and it doesn't exist, create it?|true|
@ -258,18 +146,9 @@ These properties specify the jdbc connection and other configuration around the
 |`druid.metadata.storage.tables.taskLog`|Used by the indexing service to store task logs.|druid_taskLog|
 |`druid.metadata.storage.tables.taskLock`|Used by the indexing service to store task locks.|druid_taskLock|

-### Jackson Config Manager Module
+### Deep Storage

-The Jackson Config manager reads and writes config entries from the Druid config table using [Jackson](http://jackson.codehaus.org/).
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.manager.config.pollDuration`|How often the manager polls the config table for updates.|PT1m|
-
-
-### DataSegment Pusher/Puller Module
-
-This module is used to configure Druid deep storage. The configurations concern how to push and pull [Segments](Segments.html) from deep storage.
+The configurations concern how to push and pull [Segments](Segments.html) from deep storage.

 |Property|Description|Default|
 |--------|-----------|-------|
@ -293,22 +172,14 @@ This deep storage is used to interface with Amazon's S3.

 |Property|Description|Default|
 |--------|-----------|-------|
+|`druid.s3.accessKey`|The access key to use to access S3.|none|
+|`druid.s3.secretKey`|The secret key to use to access S3.|none|
 |`druid.storage.bucket`|S3 bucket name.|none|
 |`druid.storage.baseKey`|S3 object key prefix for storage.|none|
 |`druid.storage.disableAcl`|Boolean flag for ACL.|false|
 |`druid.storage.archiveBucket`|S3 bucket name for archiving when running the indexing-service *archive task*.|none|
 |`druid.storage.archiveBaseKey`|S3 object key prefix for archiving.|none|

-#### AWS Module
-
-This module is used to interact with S3.
-
-|Property|Description|Default|
-|--------|-----------|-------|
-|`druid.s3.accessKey`|The access key to use to access S3.|none|
-|`druid.s3.secretKey`|The secret key to use to access S3.|none|
-
-
 #### HDFS Deep Storage

 This deep storage is used to interface with HDFS.
@ -326,35 +197,62 @@ This deep storage is used to interface with Cassandra.
 |`druid.storage.host`|Cassandra host.|none|
 |`druid.storage.keyspace`|Cassandra key space.|none|

-### Task Log Module
+### Caching

-This module is used to configure the [Indexing Service](Indexing-Service.html) task logs.
+If you are using a distributed cache such as memcached, you can include the configuration here.

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.indexer.logs.type`|Choices:noop, s3, file. Where to store task logs|file|
+|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
+|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|

-#### File Task Logs
-
-Store task logs in the local filesystem.
+#### Local Cache

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.indexer.logs.directory`|Local filesystem path.|log|
+|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
+|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
+|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|

-#### S3 Task Logs
-
-Store task logs in S3.
+#### Memcache

 |Property|Description|Default|
 |--------|-----------|-------|
-|`druid.indexer.logs.s3Bucket`|S3 bucket name.|none|
-|`druid.indexer.logs.s3Prefix`|S3 key prefix.|none|
+|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
+|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
+|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
+|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
+|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|

-#### Noop Task Logs
+### Indexing Service Discovery

-No task logs are actually stored.
+This config is used to find the [Indexing Service](Indexing-Service.html) using Curator service discovery. Only required if you are actually running an indexing service.

-### Firehose Module
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.selectors.indexing.serviceName`|The druid.service name of the indexing service Overlord node. To start the Overlord with a different name, set it with this property. |overlord|

-The Firehose module lists all available firehoses. There are no configurations.
+### Announcing Segments
+
+You can optionally configure how to announce and unannounce Znodes in ZooKeeper (using Curator). For normal operations you do not need to override any of these configs.
+
+#### Data Segment Announcer
+
+Data segment announcers are used to announce segments.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.announcer.type`|Choices: legacy or batch. The type of data segment announcer to use.|batch|
+
+##### Single Data Segment Announcer
+
+In legacy Druid, each segment served by a node would be announced as an individual Znode.
+
+##### Batch Data Segment Announcer
+
+In current Druid, multiple data segments may be announced under the same Znode.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.announcer.segmentsPerNode`|Each Znode contains info for up to this many segments.|50|
+|`druid.announcer.maxBytesPerNode`|Max byte size for Znode.|524288|
--- a/docs/content/Coordinator-Config.md
+++ b/docs/content/Coordinator-Config.md
@ -8,16 +8,32 @@ For general Coordinator Node information, see [here](Coordinator.html).
 Runtime Configuration
 ---------------------

-The coordinator module uses several of the default modules in [Configuration](Configuration.html) and has the following set of configurations as well:
+The coordinator node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
+
+### Node Config
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
+|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
+|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
+
+### Coordinator Operation

 |Property|Description|Default|
 |--------|-----------|-------|
 |`druid.coordinator.period`|The run period for the coordinator. The coordinator’s operates by maintaining the current state of the world in memory and periodically looking at the set of segments available and segments being served to make decisions about whether any changes need to be made to the data topology. This property sets the delay between each of these runs.|PT60S|
 |`druid.coordinator.period.indexingPeriod`|How often to send indexing tasks to the indexing service. Only applies if merge or conversion is turned on.|PT1800S (30 mins)|
 |`druid.coordinator.startDelay`|The operation of the Coordinator works on the assumption that it has an up-to-date view of the state of the world when it runs, the current ZK interaction code, however, is written in a way that doesn’t allow the Coordinator to know for a fact that it’s done loading the current state of the world. This delay is a hack to give it enough time to believe that it has all the data.|PT300S|
-|`druid.coordinator.merge.on`|Boolean flag for whether or not the coordinator should try and merge small segments into a more optimal segment size.|PT300S|
+|`druid.coordinator.merge.on`|Boolean flag for whether or not the coordinator should try and merge small segments into a more optimal segment size.|false|
 |`druid.coordinator.conversion.on`|Boolean flag for converting old segment indexing versions to the latest segment indexing version.|false|
-|`druid.coordinator.load.timeout`|The timeout duration for when the coordinator assigns a segment to a historical node.|15 minutes|
+|`druid.coordinator.load.timeout`|The timeout duration for when the coordinator assigns a segment to a historical node.|PT15M|
+
+### Metadata Retrieval
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.manager.config.pollDuration`|How often the manager polls the config table for updates.|PT1m|
 |`druid.manager.segment.pollDuration`|The duration between polls the Coordinator does for updates to the set of active segments. Generally defines the amount of lag time it can take for the coordinator to notice new segments.|PT1M|
 |`druid.manager.rules.pollDuration`|The duration between polls the Coordinator does for updates to the set of active rules. Generally defines the amount of lag time it can take for the coordinator to notice rules.|PT1M|
 |`druid.manager.rules.defaultTier`|The default tier from which default rules will be loaded from.|_default|
--- a/docs/content/Examples.md
+++ b/docs/content/Examples.md
@ -4,7 +4,7 @@ layout: doc_page
 Examples
 ========

-The examples on this page are setup in order to give you a feel for what Druid does in practice. They are quick demos of Druid based on [CliRealtimeExample](https://github.com/metamx/druid/blob/master/services/src/main/java/io/druid/cli/CliRealtimeExample.java). While you wouldn’t run it this way in production you should be able to see how ingestion works and the kind of exploratory queries that are possible. Everything that can be done on your box here can be scaled out to 10’s of billions of events and terabytes of data per day in a production cluster while still giving the snappy responsive exploratory queries.
+The examples on this page are setup in order to give you a feel for what Druid does in practice. They are quick demos of Druid based on [CliRealtimeExample](https://github.com/druid-io/druid/blob/master/services/src/main/java/io/druid/cli/CliRealtimeExample.java). While you wouldn’t run it this way in production you should be able to see how ingestion works and the kind of exploratory queries that are possible. Everything that can be done on your box here can be scaled out to 10’s of billions of events and terabytes of data per day in a production cluster while still giving the snappy responsive exploratory queries.

 Installing Standalone Druid
 ---------------------------
@ -16,7 +16,7 @@ There are two options for installing standalone Druid. Building from source, and
 Clone Druid and build it:

 ``` bash
-git clone https://github.com/metamx/druid.git druid
+git clone https://github.com/druid-io/druid.git druid
 cd druid
 git fetch --tags
 git checkout druid-0.6.160
@ -37,7 +37,7 @@ Twitter Example

 For a full tutorial based on the twitter example, check out this [Twitter Tutorial](Twitter-Tutorial.html).

-This Example uses a feature of Twitter that allows for sampling of it’s stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/metamx/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
+This Example uses a feature of Twitter that allows for sampling of it’s stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/druid-io/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.

 ### What you’ll learn
 * See how large amounts of data gets ingested into Druid in real-time
--- a/docs/content/Historical-Config.md
+++ b/docs/content/Historical-Config.md
@ -5,18 +5,98 @@ Historical Node Configuration
 =============================
 For general Historical Node information, see [here](Historical.html).

-
 Runtime Configuration
 ---------------------

-The historical module uses several of the default modules in [Configuration](Configuration.html) and has a few configs of its own.
+The historical node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:

-#### Local Cache
+### Node Configs
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
+|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
+|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
+
+### General Configuration
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.server.tier`| A string to name the distribution tier that the storage node belongs to. Many of the [rules Coordinator nodes use](Rule-Configuration.html) to manage segments can be keyed on tiers. |  `_default_tier` |
+|`druid.server.priority`|In a tiered architecture, the priority of the tier, thus allowing control over which nodes are queried. Higher numbers mean higher priority. The default (no priority) works for architecture with no cross replication (tiers that have no data-storage overlap). Data centers typically have equal priority. | 0 |
+
+### Storing Segments
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.segmentCache.locations`|Segments assigned to a Historical node are first stored on the local file system (in a disk cache) and then served by the Historical node. These locations define where that local cache resides. | none (no caching) |
+|`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true|
+|`druid.segmentCache.dropSegmentDelayMillis`|How long a node delays before completely dropping segment.|30000 (30 seconds)|
+|`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the Coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir|
+|`druid.segmentCache.announceIntervalMillis`|How frequently to announce segments while segments are loading from cache. Set this value to zero to wait for all segments to be loaded before announcing.|5000 (5 seconds)|
+|`druid.segmentCache.numLoadingThreads`|How many segments to load concurrently from from deep storage.|1|
+
+### Query Configs
+
+#### Concurrent Requests
+
+Druid uses Jetty to serve HTTP requests.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.server.http.numThreads`|Number of threads for HTTP requests.|10|
+|`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m|
+
+#### Processing
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
+|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
+|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
+|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|
+
+#### General Query Configuration
+
+##### GroupBy Query Config
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
+|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
+|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|
+
+##### Search Query Config
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|
+
+### Caching
+
+You can optionally only configure caching to be enabled on the historical by setting caching configs here.

 |Property|Possible Values|Description|Default|
 |--------|---------------|-----------|-------|
-|`druid.historical.cache.useCache`|`true`,`false`|Allow cache to be used. Cache will NOT be used unless this is set.|`false`|
-|`druid.historical.cache.populateCache`|`true`,`false`|Allow cache to be populated. Cache will NOT be populated unless this is set.|`false`|
-|`druid.historical.cache.unCacheable`|All druid query types|Do not attempt to cache queries whose types are in this array|`["groupBy","select"]`|
-|`druid.historical.cache.numBackgroundThreads`|Non-negative integer|Number of background threads in the thread pool to use for eventual-consistency caching results if caching is used. It is recommended to set this value greater or equal to the number of processing threads. To force caching to execute in the same thread as the query (query results are blocked on caching completion), use a thread count of 0. Setups who use a Druid backend in programatic settings (sub-second re-querying) should consider setting this to 0 to prevent eventual consistency from biting overall performance in the ass. If this is you, please experiment to find out what setting works best.|`0`|
+|`druid.historical.cache.useCache`|Enable the cache on the broker.|false|
+|`druid.historical.cache.populateCache`|Populate the cache on the broker.|false|
+|`druid.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
+|`druid.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|

+#### Local Cache
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0|
+|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000|
+|`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0|
+
+#### Memcache
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
+|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
+|`druid.cache.hosts`|Command separated list of Memcached hosts `<host:port>`.|none|
+|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
+|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
--- a/docs/content/Indexing-Service-Config.md
+++ b/docs/content/Indexing-Service-Config.md
@ -3,9 +3,54 @@ layout: doc_page
 ---
 For general Indexing Service information, see [here](Indexing-Service.html).

-#### Runtime Configuration
+## Runtime Configuration

-In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord has the following basic configs:
+The indexing service uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:
+
+### Must be set on Overlord and Middle Manager
+
+#### Node Configs
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
+|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
+|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|
+
+#### Task Logging
+
+If you are running the indexing service in remote mode, the task logs must S3 or HDFS.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.indexer.logs.type`|Choices:noop, s3, hdfs, file. Where to store task logs|file|
+
+##### File Task Logs
+
+Store task logs in the local filesystem.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.indexer.logs.directory`|Local filesystem path.|log|
+
+##### S3 Task Logs
+
+Store task logs in S3.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.indexer.logs.s3Bucket`|S3 bucket name.|none|
+|`druid.indexer.logs.s3Prefix`|S3 key prefix.|none|
+
+##### HDFS Task Logs
+
+Store task logs in HDFS.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.indexer.logs.directory`|The directory to store logs.|none|
+
+### Overlord Configs

 |Property|Description|Default|
 |--------|-----------|-------|
@ -23,7 +68,7 @@ The following configs only apply if the overlord is running in remote mode:
 |--------|-----------|-------|
 |`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task as been assigned to a middle manager before throwing an error.|PT5M|
 |`druid.indexer.runner.minWorkerVersion`|The minimum middle manager version to send tasks to. |"0"|
-|`druid.indexer.runner.compressZnodes`|Indicates whether or not the overlord should expect middle managers to compress Znodes.|false|
+|`druid.indexer.runner.compressZnodes`|Indicates whether or not the overlord should expect middle managers to compress Znodes.|true|
 |`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|

 There are additional configs for autoscaling (if it is enabled):
@ -33,9 +78,9 @@ There are additional configs for autoscaling (if it is enabled):
 |`druid.indexer.autoscale.strategy`|Choices are "noop" or "ec2". Sets the strategy to run when autoscaling is required.|noop|
 |`druid.indexer.autoscale.doAutoscale`|If set to "true" autoscaling will be enabled.|false|
 |`druid.indexer.autoscale.provisionPeriod`|How often to check whether or not new middle managers should be added.|PT1M|
-|`druid.indexer.autoscale.terminatePeriod`|How often to check when middle managers should be removed.|PT1H|
+|`druid.indexer.autoscale.terminatePeriod`|How often to check when middle managers should be removed.|PT5M|
 |`druid.indexer.autoscale.originTime`|The starting reference timestamp that the terminate period increments upon.|2012-01-01T00:55:00.000Z|
-|`druid.indexer.autoscale.workerIdleTimeout`|How long can a worker be idle (not a run task) before it can be considered for termination.|PT10M|
+|`druid.indexer.autoscale.workerIdleTimeout`|How long can a worker be idle (not a run task) before it can be considered for termination.|PT90M|
 |`druid.indexer.autoscale.maxScalingDuration`|How long the overlord will wait around for a middle manager to show up before giving up.|PT15M|
 |`druid.indexer.autoscale.numEventsToTrack`|The number of autoscaling related events (node creation and termination) to track.|10|
 |`druid.indexer.autoscale.pendingTaskTimeout`|How long a task can be in "pending" state before the overlord tries to scale up.|PT30S|
@ -44,43 +89,105 @@ There are additional configs for autoscaling (if it is enabled):

 #### Dynamic Configuration

-Overlord dynamic configuration is mainly for autoscaling. The overlord reads a worker setup spec as a JSON object from the Druid [metadata storage](Metadata-storage.html) config table. This object contains information about the version of middle managers to create, the maximum and minimum number of middle managers in the cluster at one time, and additional information required to automatically create middle managers.
+The overlord can dynamically change worker behavior.

 The JSON object can be submitted to the overlord via a POST request at:

 ```
-http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
+http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker
 ```

-A sample worker setup spec is shown below:
+A sample worker config spec is shown below:

 ```json
 {
-  "minVersion":"some_version",
-  "minNumWorkers":"0",
-  "maxNumWorkers":"10",
-  "nodeData": {
-    "type":"ec2",
-    "amiId":"ami-someId",
-    "instanceType":"m1.xlarge",
-    "minInstances":"1",
-    "maxInstances":"1",
-    "securityGroupIds":["securityGroupIds"],
-    "keyName":"keyName"
+  "selectStrategy": {
+    "type": "fillCapacityWithAffinity",
+    "affinityConfig": {
+      "affinity": {
+        "datasource1": ["ip1:port", "ip2:port"],
+        "datasource2": ["ip3:port"]
+      }
+    }
  },
-  "userData":{
-    "impl":"string",
-    "data":"version=:VERSION:",
-    "versionReplacementString":":VERSION:"
+  "autoScaler": {
+    "type": "ec2",
+    "minNumWorkers": 2,
+    "maxNumWorkers": 12,
+    "envConfig": {
+      "availabilityZone": "us-east-1a",
+      "nodeData": {
+        "amiId": "${AMI}",
+        "instanceType": "c3.8xlarge",
+        "minInstances": 1,
+        "maxInstances": 1,
+        "securityGroupIds": ["${IDs}"],
+        "keyName": ${KEY_NAME}
+      },
+      "userData": {
+        "impl": "string",
+        "data": "${SCRIPT_COMMAND}",
+        "versionReplacementString": ":VERSION:",
+        "version": null
+      }
+    }
  }
 }
 ```

-Issuing a GET request at the same URL will return the current worker setup spec that is currently in place. The worker setup spec list above is just a sample and it is possible to extend the code base for other deployment environments. A description of the worker setup spec is shown below.
+Issuing a GET request at the same URL will return the current worker config spec that is currently in place. The worker config spec list above is just a sample for EC2 and it is possible to extend the code base for other deployment environments. A description of the worker config spec is shown below.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`selectStrategy`|How to assign tasks to middlemanagers. Choices are `fillCapacity` and `fillCapacityWithAffinity`.|fillCapacity|
+|`autoScaler`|Only used if autoscaling is enabled. See below.|null|
+
+#### Worker Select Strategy
+
+##### Fill Capacity
+
+Workers are assigned tasks until capacity.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`type`|`fillCapacity`.|fillCapacity|
+
+##### Fill Capacity With Affinity
+
+An affinity config can be provided.
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`type`|`fillCapacityWithAffinity`.|fillCapacityWithAffinity|
+|`affinity`|A map to String to list of String host names.|{}|
+
+Tasks will try to be assigned to preferred workers. Fill capacity strategy is used if no preference for a datasource specified.
+
+#### Autoscaler
+
+Amazon's EC2 is currently the only supported autoscaler.

 |Property|Description|Default|
 |--------|-----------|-------|
 |`minNumWorkers`|The minimum number of workers that can be in the cluster at any given time.|0|
 |`maxNumWorkers`|The maximum number of workers that can be in the cluster at any given time.|0|
-|`nodeData`|A JSON object that describes how to launch new nodes. Currently, only EC2 is supported.|none; required|
-|`userData`|A JSON object that describes how to configure new nodes. Currently, only EC2 is supported. If you have set druid.indexer.autoscale.workerVersion, this must have a versionReplacementString. Otherwise, a versionReplacementString is not necessary.|none; optional|
+|`availabilityZone`|What availability zone to run in.|none|
+|`nodeData`|A JSON object that describes how to launch new nodes.|none; required|
+|`userData`|A JSON object that describes how to configure new nodes. If you have set druid.indexer.autoscale.workerVersion, this must have a versionReplacementString. Otherwise, a versionReplacementString is not necessary.|none; optional|
+
+### MiddleManager Configs
+
+Middle managers pass their configurations down to their child peons. The middle manager requires the following configs:
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.worker.ip`|The IP of the worker.|localhost|
+|`druid.worker.version`|Version identifier for the middle manager.|0|
+|`druid.worker.capacity`|Maximum number of tasks the middle manager can accept.|Number of available processors - 1|
+|`druid.indexer.runner.compressZnodes`|Indicates whether or not the middle managers should compress Znodes.|false|
+|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|
+|`druid.indexer.runner.javaCommand`|Command required to execute java.|java|
+|`druid.indexer.runner.javaOpts`|-X Java options to run the peon in its own JVM.|""|
+|`druid.indexer.runner.classpath`|Java classpath for the peon.|System.getProperty("java.class.path")|
+|`druid.indexer.runner.startPort`|The port that peons begin running on.|8081|
+|`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|"com.metamx", "druid", "io.druid", "user.timezone","file.encoding"|
--- a/docs/content/Ingestion-FAQ.md
+++ b/docs/content/Ingestion-FAQ.md
@ -12,14 +12,14 @@ Depending on what `druid.storage.type` is set to, Druid will upload segments to

 ## My realtime node is not handing segments off

-Make sure that the `druid.publish.type` on your real-time nodes is set to `metadata`. Also make sure that `druid.storage.type` is set to a deep storage that makes sense. Some example configs:
+Make sure that the `druid.publish.type` on your real-time nodes is set to "metadata". Also make sure that `druid.storage.type` is set to a deep storage that makes sense. Some example configs:

 ```
-druid.publish.type=metadata
+druid.publish.type=db

-druid.metadata.storage.connector.connectURI=jdbc\:metadata storage\://localhost\:3306/druid
-druid.metadata.storage.connector.user=druid
-druid.metadata.storage.connector.password=diurd
+druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
+druid.db.connector.user=druid
+druid.db.connector.password=diurd

 druid.storage.type=s3
 druid.storage.bucket=druid
--- a/docs/content/Plumber.md
+++ b/docs/content/Plumber.md
@ -13,7 +13,7 @@ The following can be configured on the plumber:

 * `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
 * `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
-* `maxPendingPersists` is how many persists a plumber can do concurrently without starting to block.
+* `maxPendingPersists` is the maximum number of persists that can be pending, but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes.
 * `segmentGranularity` specifies the granularity of the segment, or the amount of time a segment will represent.
 * `rejectionPolicy` controls how data sets the data acceptance policy for creating and handing off segments. The following policies are available:
    * `serverTime` &ndash; The recommended policy for "current time" data, it is optimal for current data that is generated and ingested in real time. Uses `windowPeriod` to accept only those events that are inside the window looking forward and back.
--- a/docs/content/Realtime-Config.md
+++ b/docs/content/Realtime-Config.md
@ -2,118 +2,62 @@
 layout: doc_page
 ---
 Realtime Node Configuration
-===========================
-For general Real-time Node information, see [here](Realtime.html).
+==============================
+For general Realtime Node information, see [here](Realtime.html).

-For Real-time Ingestion, see [Realtime Ingestion](Realtime-ingestion.html).
+Runtime Configuration
+---------------------

-Quick Start
-----------
-Run:
+The realtime node uses several of the global configs in [Configuration](Configuration.html) and has the following set of configurations as well:

-```
-io.druid.cli.Main server realtime
-```
+### Node Config

-With the following JVM configuration:
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.host`|The host for the current node. This is used to advertise the current processes location as reachable from another node and should generally be specified such that `http://${druid.host}/` could actually talk to this process|none|
+|`druid.port`|This is the port to actually listen on; unless port mapping is used, this will be the same port as is on `druid.host`|none|
+|`druid.service`|The name of the service. This is used as a dimension when emitting metrics and alerts to differentiate between the various services|none|

-```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
+### Realtime Operation

-druid.host=localhost
-druid.service=realtime
-druid.port=8083
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.publish.type`|Where to publish segments. Choices are "noop" or "metadata".|metadata|
+|`druid.realtime.specFile`|File location of realtime specFile.|none|

-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.160"]
+### Storing Intermediate Segments
+
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.segmentCache.locations`|Where intermediate segments are stored. The maxSize should always be zero.|none|


-druid.zk.service.host=localhost
+### Query Configs

-# The realtime config file.
-druid.realtime.specFile=/path/to/specFile
+#### Processing

-# Choices: metadata (hand off segments), noop (do not hand off segments).
-druid.publish.type=metadata
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
+|`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
+|`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
+|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|

-druid.metadata.storage.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
-druid.metadata.storage.connector.user=druid
-druid.metadata.storage.connector.password=diurd
+#### General Query Configuration

-druid.processing.buffer.sizeBytes=100000000
-```
+##### GroupBy Query Config

-Production Configs
------------------
-These production configs are using S3 as a deep store.
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.groupBy.singleThreaded`|Run single threaded group By queries.|false|
+|`druid.query.groupBy.maxIntermediateRows`|Maximum number of intermediate rows.|50000|
+|`druid.query.groupBy.maxResults`|Maximum number of results.|500000|

-JVM settings:
+##### Search Query Config

-```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
+|Property|Description|Default|
+|--------|-----------|-------|
+|`druid.query.search.maxSearchLimit`|Maximum number of search results to return.|1000|

-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
-```

-Runtime.properties:

-```
-druid.host=#{IP_ADDR}:8080
-druid.port=8080
-druid.service=druid/prod/realtime
-
-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.160","io.druid.extensions:druid-kafka-seven:0.6.160"]
-
-druid.zk.service.host=#{ZK_IPs}
-druid.zk.paths.base=/druid/prod
-
-druid.s3.accessKey=#{ACCESS_KEY}
-druid.s3.secretKey=#{SECRET_KEY}
-
-druid.metadata.storage.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
-druid.metadata.storage.connector.user=#{MYSQL_USER}
-druid.metadata.storage.connector.password=#{MYSQL_PW}
-druid.metadata.storage.connector.useValidationQuery=true
-druid.metadata.storage.tables.base=prod
-
-druid.publish.type=metadata
-
-druid.processing.numThreads=3
-
-druid.request.logging.type=file
-druid.request.logging.dir=request_logs/
-
-druid.realtime.specFile=conf/schemas.json
-
-druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
-
-druid.storage.type=s3
-druid.storage.bucket=#{S3_STORAGE_BUCKET}
-druid.storage.baseKey=prod-realtime/v1
-
-druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "io.druid.segment.realtime.RealtimeMetricsMonitor"]
-
-# Emit metrics over http
-druid.emitter=http
-druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
-
-# If you choose to compress ZK announcements, you must do so for every node type
-druid.announcer.type=batch
-druid.curator.compress=true
-```
-
-The realtime module also uses several of the default modules in [Configuration](Configuration.html). For more information on the realtime spec file (or configuration file), see [realtime ingestion](Realtime-ingestion.html) page.
--- a/docs/content/Realtime-ingestion.md
+++ b/docs/content/Realtime-ingestion.md
@ -134,13 +134,13 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |Field|Type|Description|Required|
 |-----|----|-----------|--------|
 |type|String|This should always be 'realtime'.|no|
-|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size.|no (default == 5 million)|
+|maxRowsInMemory|Integer|The number of rows to aggregate before persisting. This number is the post-aggregation rows, so it is not equivalent to the number of input events, but the number of aggregated rows that those events result in. This is used to manage the required JVM heap size. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 5 million)|
 |windowPeriod|ISO 8601 Period String|The amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.|no (default == PT10m)|
 |intermediatePersistPeriod|ISO8601 Period String|The period that determines the rate at which intermediate persists occur. These persists determine how often commits happen against the incoming realtime stream. If the realtime data loading process is interrupted at time T, it should be restarted to re-read data that arrived at T minus this period.|no (default == PT10m)|
 |basePersistDirectory|String|The directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.|no (default == java tmp dir)|
 |versioningPolicy|Object|How to version segments.|no (default == based on segment start time)|
 |rejectionPolicy|Object|Controls how data sets the data acceptance policy for creating and handing off segments. More on this below.|no (default=='serverTime')|
-|maxPendingPersists|Integer|How many persists a plumber can do concurrently without starting to block.|no (default == 0)|
+|maxPendingPersists|Integer|Maximum number of persists that can be pending, but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|no (default == 0; meaning one persist can be running concurrently with ingestion, and none can be queued up)|
 |shardSpec|Object|This describes the shard that is represented by this server. This must be specified properly in order to have multiple realtime nodes indexing the same data stream in a sharded fashion.|no (default == 'NoneShardSpec'|

 #### Rejection Policy
--- a/docs/content/Realtime.md
+++ b/docs/content/Realtime.md
@ -37,7 +37,7 @@ Extending the code
 Realtime integration is intended to be extended in two ways:

 1.  Connect to data streams from varied systems ([Firehose](https://github.com/druid-io/druid-api/blob/master/src/main/java/io/druid/data/input/FirehoseFactory.java))
-2.  Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/master/server/src/main/java/io/druid/segment/realtime/plumber/PlumberSchool.java))
+2.  Adjust the publishing strategy to match your needs ([Plumber](https://github.com/druid-io/druid/blob/master/server/src/main/java/io/druid/segment/realtime/plumber/PlumberSchool.java))

 The expectations are that the former will be very common and something that users of Druid will do on a fairly regular basis. Most users will probably never have to deal with the latter form of customization. Indeed, we hope that all potential use cases can be packaged up as part of Druid proper without requiring proprietary customization.

--- a/docs/content/Recommendations.md
+++ b/docs/content/Recommendations.md
@ -16,20 +16,20 @@ Druid is not perfect in how it handles mix-cased dimension and metric names. Thi
 # SSDs

 SSDs are highly recommended for historical and real-time nodes if you are not running a cluster that is entirely in memory. SSDs can greatly mitigate the time required to page data in and out of memory.
- 
+
 # Provide Columns Names in Lexicographic Order

-Although Druid supports schema-less ingestion of dimensions, because of [https://github.com/metamx/druid/issues/658](https://github.com/metamx/druid/issues/658), you may sometimes get bigger segments than necessary. To ensure segments are as compact as possible, providing dimension names in lexicographic order is recommended. 
- 
- 
+Although Druid supports schema-less ingestion of dimensions, because of [https://github.com/druid-io/druid/issues/658](https://github.com/druid-io/druid/issues/658), you may sometimes get bigger segments than necessary. To ensure segments are as compact as possible, providing dimension names in lexicographic order is recommended.
+
+
 # Use Timeseries and TopN Queries Instead of GroupBy Where Possible
- 
-Timeseries and TopN queries are much more optimized and significantly faster than groupBy queries for their designed use cases. Issuing multiple topN or timeseries queries from your application can potentially be more efficient than a single groupBy query.  
- 
+
+Timeseries and TopN queries are much more optimized and significantly faster than groupBy queries for their designed use cases. Issuing multiple topN or timeseries queries from your application can potentially be more efficient than a single groupBy query.
+
 # Read FAQs

 You should read common problems people have here:

 1) [Ingestion-FAQ](Ingestion-FAQ.html)

-2) [Performance-FAQ](Performance-FAQ.html)
+2) [Performance-FAQ](Performance-FAQ.html)
--- a/docs/content/Stand-Alone-With-Riak-CS.md
+++ b/docs/content/Stand-Alone-With-Riak-CS.md
@ -110,7 +110,7 @@ This just walks through getting the relevant software installed and running.  Yo

 1. Clone the git repository for druid, checkout a "stable" tag and build 

-        git clone https://github.com/metamx/druid.git druid
+        git clone https://github.com/druid-io/druid.git druid
        pushd druid
        git checkout druid-0.4.12
        export LANGUAGE=C
--- a/examples/bin/examples/twitter/query.body
+++ b/examples/bin/examples/twitter/query.body
@ -1,4 +1,31 @@
 {
-  "queryType": "timeBoundary",
-  "dataSource": "twitterstream"
-}
+  "description": "Simple data split up by hour",
+  "aggregations": [
+    {
+      "name": "tweets",
+      "type": "longSum",
+      "fieldName": "tweets"
+    },
+    {
+      "fieldName": "text_hll",
+      "name": "text_hll",
+      "type": "hyperUnique"
+    },
+    {
+      "fieldName": "htags_hll",
+      "name": "htag_hll",
+      "type": "hyperUnique"
+    },
+    {
+      "fieldName": "user_id_hll",
+      "name": "user_id_hll",
+      "type": "hyperUnique"
+    }
+  ],
+  "dataSource": "twitterstream",
+  "granularity": "hour",
+  "intervals": [
+    "1970-01-01T00:00:00.000/2019-01-03T00:00:00.000"
+  ],
+  "queryType": "timeseries"
+}
--- a/examples/bin/examples/twitter/topN_query.body
+++ b/examples/bin/examples/twitter/topN_query.body
@ -0,0 +1,94 @@
+{
+    "description": "Top 10 languages by count of tweets in the contiguous US",
+    "aggregations": [
+        {
+            "fieldName": "tweets",
+            "name": "tweets",
+            "type": "longSum"
+        },
+        {
+            "fieldName": "user_id_hll",
+            "name": "user_id_hll",
+            "type": "hyperUnique"
+        },
+        {
+            "fieldName": "contributors_hll",
+            "name": "contributors_hll",
+            "type": "hyperUnique"
+        },
+        {
+            "fieldName": "htags_hll",
+            "name": "htags_hll",
+            "type": "hyperUnique"
+        },
+        {
+            "fieldName": "text_hll",
+            "name": "text_hll",
+            "type": "hyperUnique"
+        },
+        {
+            "fieldName": "min_follower_count",
+            "name": "min_follower_count",
+            "type": "min"
+        },
+        {
+            "fieldName": "max_follower_count",
+            "name": "max_follower_count",
+            "type": "max"
+        },
+        {
+            "fieldName": "min_friends_count",
+            "name": "min_friends_count",
+            "type": "min"
+        },
+        {
+            "fieldName": "max_friends_count",
+            "name": "max_friends_count",
+            "type": "max"
+        },
+        {
+            "fieldName": "min_statuses_count",
+            "name": "min_statuses_count",
+            "type": "min"
+        },
+        {
+            "fieldName": "max_statuses_count",
+            "name": "max_statuses_count",
+            "type": "max"
+        },
+        {
+            "fieldName": "min_retweet_count",
+            "name": "min_retweet_count",
+            "type": "min"
+        },
+        {
+            "fieldName": "max_retweet_count",
+            "name": "max_retweet_count",
+            "type": "max"
+        }
+    ],
+    "dataSource": "twitterstream",
+    "dimension": "lang",
+    "filter": {
+        "bound": {
+            "maxCoords": [
+                50,
+                -65
+            ],
+            "minCoords": [
+                25,
+                -127
+            ],
+            "type": "rectangular"
+        },
+        "dimension": "geo",
+        "type": "spatial"
+    },
+    "granularity": "all",
+    "intervals": [
+        "2013-06-01T00:00/2020-01-01T00"
+    ],
+    "metric": "tweets",
+    "queryType": "topN",
+    "threshold": "10"
+}
--- a/examples/bin/examples/twitter/twitter_realtime.spec
+++ b/examples/bin/examples/twitter/twitter_realtime.spec
@ -1,119 +1,151 @@
-[
-  {
+{
+  "description": "Ingestion spec for Twitter spritzer. Dimension values taken from io.druid.examples.twitter.TwitterSpritzerFirehoseFactory",
+  "spec": {
    "dataSchema": {
      "dataSource": "twitterstream",
-      "parser": {
-        "parseSpec": {
-          "format": "json",
-          "timestampSpec": {
-            "column": "utcdt",
-            "format": "iso"
-          },
-          "dimensionsSpec": {
-            "dimensions": [
-              
-            ],
-            "dimensionExclusions": [
-              
-            ],
-            "spatialDimensions": [
-              
-            ]
-          }
-        }
+      "granularitySpec": {
+        "queryGranularity": "all",
+        "segmentGranularity": "hour",
+        "type": "uniform"
      },
      "metricsSpec": [
        {
-          "type": "count",
-          "name": "tweets"
+          "name": "tweets",
+          "type": "count"
        },
        {
-          "type": "doubleSum",
          "fieldName": "follower_count",
-          "name": "total_follower_count"
+          "name": "total_follower_count",
+          "type": "doubleSum"
        },
        {
-          "type": "doubleSum",
          "fieldName": "retweet_count",
-          "name": "total_retweet_count"
+          "name": "total_retweet_count",
+          "type": "doubleSum"
        },
        {
-          "type": "doubleSum",
          "fieldName": "friends_count",
-          "name": "total_friends_count"
+          "name": "total_friends_count",
+          "type": "doubleSum"
        },
        {
-          "type": "doubleSum",
          "fieldName": "statuses_count",
-          "name": "total_statuses_count"
+          "name": "total_statuses_count",
+          "type": "doubleSum"
+        },
+        {
+          "fieldName": "text",
+          "name": "text_hll",
+          "type": "hyperUnique"
+        },
+        {
+          "fieldName": "user_id",
+          "name": "user_id_hll",
+          "type": "hyperUnique"
+        },
+        {
+          "fieldName": "contributors",
+          "name": "contributors_hll",
+          "type": "hyperUnique"
+        },
+        {
+          "fieldName": "htags",
+          "name": "htags_hll",
+          "type": "hyperUnique"
        },
        {
-          "type": "min",
          "fieldName": "follower_count",
-          "name": "min_follower_count"
+          "name": "min_follower_count",
+          "type": "min"
        },
        {
-          "type": "max",
          "fieldName": "follower_count",
-          "name": "max_follower_count"
+          "name": "max_follower_count",
+          "type": "max"
        },
        {
-          "type": "min",
          "fieldName": "friends_count",
-          "name": "min_friends_count"
+          "name": "min_friends_count",
+          "type": "min"
        },
        {
-          "type": "max",
          "fieldName": "friends_count",
-          "name": "max_friends_count"
+          "name": "max_friends_count",
+          "type": "max"
        },
        {
-          "type": "min",
          "fieldName": "statuses_count",
-          "name": "min_statuses_count"
+          "name": "min_statuses_count",
+          "type": "min"
        },
        {
-          "type": "max",
          "fieldName": "statuses_count",
-          "name": "max_statuses_count"
+          "name": "max_statuses_count",
+          "type": "max"
        },
        {
-          "type": "min",
          "fieldName": "retweet_count",
-          "name": "min_retweet_count"
+          "name": "min_retweet_count",
+          "type": "min"
        },
        {
-          "type": "max",
          "fieldName": "retweet_count",
-          "name": "max_retweet_count"
+          "name": "max_retweet_count",
+          "type": "max"
        }
      ],
-      "granularitySpec": {
-        "type": "uniform",
-        "segmentGranularity": "DAY",
-        "queryGranularity": "NONE"
+      "parser": {
+        "parseSpec": {
+          "dimensionsSpec": {
+            "dimensions": [
+              "text",
+              "htags",
+              "contributors",
+              "lat",
+              "lon",
+              "retweet_count",
+              "follower_count",
+              "friendscount",
+              "lang",
+              "utc_offset",
+              "statuses_count",
+              "user_id",
+              "ts"
+            ],
+            "dimensionExclusions": [
+            ],
+            "spatialDimensions": [
+              {
+                "dimName": "geo",
+                "dims": [
+                  "lat",
+                  "lon"
+                ]
+              }
+            ]
+          },
+          "format": "json",
+          "timestampSpec": {
+            "column": "ts",
+            "format": "millis"
+          }
+        }
      }
    },
    "ioConfig": {
-      "type": "realtime",
      "firehose": {
-        "type": "twitzer",
        "maxEventCount": 500000,
-        "maxRunMinutes": 120
+        "maxRunMinutes": 120,
+        "type": "twitzer"
      },
-      "plumber": {
-        "type": "realtime"
-      }
+      "type": "realtime"
    },
    "tuningConfig": {
-      "type": "realtime",
+      "intermediatePersistPeriod": "PT10m",
      "maxRowsInMemory": 500000,
-      "intermediatePersistPeriod": "PT2m",
-      "windowPeriod": "PT3m",
-      "basePersistDirectory": "\/tmp\/realtime\/basePersist",
-      "rejectionPolicy": {
-        "type": "messageTime"
-      }
+      "type": "realtime",
+      "windowPeriod": "PT10m"
    }
-  }
-]
+  },
+  "type": "index_realtime"
+}
--- a/examples/src/main/java/io/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java
+++ b/examples/src/main/java/io/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java
@ -1,6 +1,6 @@
 /*
 * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ * Copyright (C) 2012, 2013, 2014  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
@ -22,6 +22,8 @@ package io.druid.examples.twitter;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.metamx.common.logger.Logger;
 import io.druid.data.input.Firehose;
@ -30,6 +32,7 @@ import io.druid.data.input.InputRow;
 import io.druid.data.input.MapBasedInputRow;
 import io.druid.data.input.impl.InputRowParser;
 import twitter4j.ConnectionLifeCycleListener;
+import twitter4j.GeoLocation;
 import twitter4j.HashtagEntity;
 import twitter4j.StallWarning;
 import twitter4j.Status;
@ -39,12 +42,13 @@ import twitter4j.TwitterStream;
 import twitter4j.TwitterStreamFactory;
 import twitter4j.User;

+import javax.annotation.Nullable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.TimeUnit;
@ -58,25 +62,27 @@ import static java.lang.Thread.sleep;
 * with timestamps along with ??.
 * The generated tuples have the form (timestamp, ????)
 * where the timestamp is from the twitter event.
- *
+ * <p/>
 * Example spec file:
- *
+ * <p/>
 * Example query using POST to /druid/v2/?w  (where w is an arbitrary parameter and the date and time
 * is UTC):
- *
+ * <p/>
 * Notes on twitter.com HTTP (REST) API: v1.0 will be disabled around 2013-03 so v1.1 should be used;
 * twitter4j 3.0 (not yet released) will support the v1.1 api.
 * Specifically, we should be using https://stream.twitter.com/1.1/statuses/sample.json
 * See: http://jira.twitter4j.org/browse/TFJ-186
+ * <p/>
+ * Notes on JSON parsing: as of twitter4j 2.2.x, the json parser has some bugs (ex: Status.toString()
+ * can have number format exceptions), so it might be necessary to extract raw json and process it
+ * separately.  If so, set twitter4.jsonStoreEnabled=true and look at DataObjectFactory#getRawJSON();
+ * com.fasterxml.jackson.databind.ObjectMapper should be used to parse.
 *
- *  Notes on JSON parsing: as of twitter4j 2.2.x, the json parser has some bugs (ex: Status.toString()
- *  can have number format exceptions), so it might be necessary to extract raw json and process it
- *  separately.  If so, set twitter4.jsonStoreEnabled=true and look at DataObjectFactory#getRawJSON();
- *  com.fasterxml.jackson.databind.ObjectMapper should be used to parse.
 * @author pbaclace
 */
@JsonTypeName("twitzer")
-public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowParser> {
+public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowParser>
+{
  private static final Logger log = new Logger(TwitterSpritzerFirehoseFactory.class);
  /**
   * max events to receive, -1 is infinite, 0 means nothing is delivered; use this to prevent
@ -107,7 +113,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
  @Override
  public Firehose connect(InputRowParser parser) throws IOException
  {
-    final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() {
+    final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener()
+    {
      @Override
      public void onConnect()
      {
@ -135,19 +142,15 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
    final int QUEUE_SIZE = 2000;
    /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread.   */
    final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE);
-    final LinkedList<String> dimensions = new LinkedList<String>();
    final long startMsec = System.currentTimeMillis();

-    dimensions.add("htags");
-    dimensions.add("lang");
-    dimensions.add("utc_offset");
-
    //
    //   set up Twitter Spritzer
    //
    twitterStream = new TwitterStreamFactory().getInstance();
    twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener);
-    statusListener = new StatusListener() {  // This is what really gets called to deliver stuff from twitter4j
+    statusListener = new StatusListener()
+    {  // This is what really gets called to deliver stuff from twitter4j
      @Override
      public void onStatus(Status status)
      {
@ -160,7 +163,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
          if (!success) {
            log.warn("queue too slow!");
          }
-        } catch (InterruptedException e) {
+        }
+        catch (InterruptedException e) {
          throw new RuntimeException("InterruptedException", e);
        }
      }
@ -192,7 +196,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
      }

      @Override
-      public void onStallWarning(StallWarning warning) {
+      public void onStallWarning(StallWarning warning)
+      {
        System.out.println("Got stall warning:" + warning);
      }
    };
@ -201,31 +206,33 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
    twitterStream.sample(); // creates a generic StatusStream
    log.info("returned from sample()");

-    return new Firehose() {
+    return new Firehose()
+    {

-      private final Runnable doNothingRunnable = new Runnable() {
+      private final Runnable doNothingRunnable = new Runnable()
+      {
        public void run()
        {
        }
      };

      private long rowCount = 0L;
-      private boolean waitIfmax = (maxEventCount < 0L);
-      private final Map<String, Object> theMap = new HashMap<String, Object>(2);
+      private boolean waitIfmax = (getMaxEventCount() < 0L);
+      private final Map<String, Object> theMap = new TreeMap<>();
      // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper();

      private boolean maxTimeReached()
      {
-        if (maxRunMinutes <= 0) {
+        if (getMaxRunMinutes() <= 0) {
          return false;
        } else {
-          return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes;
+          return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes();
        }
      }

      private boolean maxCountReached()
      {
-        return maxEventCount >= 0 && rowCount >= maxEventCount;
+        return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount();
      }

      @Override
@ -253,7 +260,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
            try {
              log.info("reached limit, sleeping a long time...");
              sleep(2000000000L);
-            } catch (InterruptedException e) {
+            }
+            catch (InterruptedException e) {
              throw new RuntimeException("InterruptedException", e);
            }
          } else {
@ -267,30 +275,60 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
        Status status;
        try {
          status = queue.take();
-        } catch (InterruptedException e) {
+        }
+        catch (InterruptedException e) {
          throw new RuntimeException("InterruptedException", e);
        }

        HashtagEntity[] hts = status.getHashtagEntities();
-        if (hts != null && hts.length > 0) {
-          List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length);
-          for (HashtagEntity ht : hts) {
-            hashTags.add(ht.getText());
-          }
+        String text = status.getText();
+        theMap.put("text", (null == text) ? "" : text);
+        theMap.put(
+            "htags", (hts.length > 0) ? Lists.transform(
+                Arrays.asList(hts), new Function<HashtagEntity, String>()
+                {
+                  @Nullable
+                  @Override
+                  public String apply(HashtagEntity input)
+                  {
+                    return input.getText();
+                  }
+                }
+            ) : ImmutableList.<String>of()
+        );

-          theMap.put("htags", Arrays.asList(hashTags.get(0)));
+        long[] lcontrobutors = status.getContributors();
+        List<String> contributors = new ArrayList<>();
+        for (long contrib : lcontrobutors) {
+          contributors.add(String.format("%d", contrib));
+        }
+        theMap.put("contributors", contributors);
+
+        GeoLocation geoLocation = status.getGeoLocation();
+        if (null != geoLocation) {
+          double lat = status.getGeoLocation().getLatitude();
+          double lon = status.getGeoLocation().getLongitude();
+          theMap.put("lat", lat);
+          theMap.put("lon", lon);
+        } else {
+          theMap.put("lat", null);
+          theMap.put("lon", null);
        }

        long retweetCount = status.getRetweetCount();
        theMap.put("retweet_count", retweetCount);
        User user = status.getUser();
-        if (user != null) {
-          theMap.put("follower_count", user.getFollowersCount());
-          theMap.put("friends_count", user.getFriendsCount());
-          theMap.put("lang", user.getLang());
-          theMap.put("utc_offset", user.getUtcOffset());  // resolution in seconds, -1 if not available?
-          theMap.put("statuses_count", user.getStatusesCount());
-        }
+        final boolean hasUser = (null != user);
+        theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0);
+        theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0);
+        theMap.put("lang", hasUser ? user.getLang() : "");
+        theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1);  // resolution in seconds, -1 if not available?
+        theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0);
+        theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : "");
+
+        theMap.put("ts",status.getCreatedAt().getTime());
+
+        List<String> dimensions = Lists.newArrayList(theMap.keySet());

        return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap);
      }
@ -311,4 +349,15 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory<InputRowP
    };
  }

+  @JsonProperty
+  public int getMaxEventCount()
+  {
+    return maxEventCount;
+  }
+
+  @JsonProperty
+  public int getMaxRunMinutes()
+  {
+    return maxRunMinutes;
+  }
 }
--- a/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java
+++ b/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java
@ -56,7 +56,7 @@ public class HdfsTaskLogs implements TaskLogs
    final Path path = getTaskLogFileFromId(taskId);
    log.info("Writing task log to: %s", path);
    Configuration conf = new Configuration();
-    final FileSystem fs = FileSystem.get(conf);
+    final FileSystem fs = path.getFileSystem(conf);
    FileUtil.copy(logFile, fs, path, false, conf);
    log.info("Wrote task log to: %s", path);
  }
@ -65,7 +65,7 @@ public class HdfsTaskLogs implements TaskLogs
  public Optional<ByteSource> streamTaskLog(final String taskId, final long offset) throws IOException
  {
    final Path path = getTaskLogFileFromId(taskId);
-    final FileSystem fs = FileSystem.get(new Configuration());
+    final FileSystem fs = path.getFileSystem(new Configuration());
    if (fs.exists(path)) {
      return Optional.<ByteSource>of(
          new ByteSource()
--- a/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogsConfig.java
+++ b/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogsConfig.java
@ -18,6 +18,7 @@
 */
 package io.druid.storage.hdfs.tasklog;

+import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;

 import javax.validation.constraints.NotNull;
@ -27,12 +28,12 @@ import javax.validation.constraints.NotNull;
 */
 public class HdfsTaskLogsConfig
 {
-
  @JsonProperty
  @NotNull
  private String directory;

-  public HdfsTaskLogsConfig(String directory)
+  @JsonCreator
+  public HdfsTaskLogsConfig(@JsonProperty("directory") String directory)
  {
    this.directory = directory;
  }
--- a/extensions/s3-extensions/pom.xml
+++ b/extensions/s3-extensions/pom.xml
@ -20,88 +20,93 @@

 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>io.druid.extensions</groupId>
-  <artifactId>druid-s3-extensions</artifactId>
-  <name>druid-s3-extensions</name>
-  <description>druid-s3-extensions</description>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>io.druid.extensions</groupId>
+    <artifactId>druid-s3-extensions</artifactId>
+    <name>druid-s3-extensions</name>
+    <description>druid-s3-extensions</description>

-  <parent>
-    <groupId>io.druid</groupId>
-    <artifactId>druid</artifactId>
-    <version>0.7.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
+    <parent>
+        <groupId>io.druid</groupId>
+        <artifactId>druid</artifactId>
+        <version>0.7.0-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>

-  <dependencies>
-    <dependency>
-      <groupId>io.druid</groupId>
-      <artifactId>druid-api</artifactId>
-    </dependency>
-    <!-- override jets3t from hadoop-core -->
-    <dependency>
-      <groupId>net.java.dev.jets3t</groupId>
-      <artifactId>jets3t</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>aws-java-sdk</artifactId>
-    </dependency>
-    <!-- override httpclient / httpcore version from jets3t -->
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpclient</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpcore</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.metamx</groupId>
-      <artifactId>emitter</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-    </dependency>
+    <dependencies>
+        <dependency>
+            <groupId>io.druid</groupId>
+            <artifactId>druid-api</artifactId>
+        </dependency>
+        <!-- override jets3t from hadoop-core -->
+        <dependency>
+            <groupId>net.java.dev.jets3t</groupId>
+            <artifactId>jets3t</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk</artifactId>
+        </dependency>
+        <!-- override httpclient / httpcore version from jets3t -->
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpcore</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.metamx</groupId>
+            <artifactId>emitter</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+        </dependency>

-    <!-- Tests -->
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.easymock</groupId>
-      <artifactId>easymock</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-jar-plugin</artifactId>
-        <configuration>
-          <archive>
-            <manifest>
-              <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
-              <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
-            </manifest>
-          </archive>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
+        <!-- Tests -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.easymock</groupId>
+            <artifactId>easymock</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>attach-sources</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-jar-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                            <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+                        </manifest>
+                    </archive>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
 </project>
--- a/extensions/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java
+++ b/extensions/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java
@ -19,7 +19,13 @@

 package io.druid.storage.s3;

+import com.amazonaws.AmazonClientException;
 import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.auth.AWSCredentialsProviderChain;
+import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
+import com.amazonaws.auth.SystemPropertiesCredentialsProvider;
+import com.amazonaws.auth.profile.ProfileCredentialsProvider;
 import com.fasterxml.jackson.databind.Module;
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableList;
@ -62,14 +68,18 @@ public class S3StorageDruidModule implements DruidModule
    binder.bind(S3TaskLogs.class).in(LazySingleton.class);
  }

-  @Provides
-  @LazySingleton
-  public AWSCredentialsProvider getAWSCredentialsProvider(final AWSCredentialsConfig config)
+  private static class ConfigDrivenAwsCredentialsConfigProvider implements AWSCredentialsProvider 
  {
-    if (!Strings.isNullOrEmpty(config.getAccessKey()) && !Strings.isNullOrEmpty(config.getSecretKey())) {
-      return new AWSCredentialsProvider() {
-        @Override
-        public com.amazonaws.auth.AWSCredentials getCredentials() {
+    private AWSCredentialsConfig config;
+
+    public ConfigDrivenAwsCredentialsConfigProvider(AWSCredentialsConfig config) {
+      this.config = config;
+    }
+    
+    @Override
+    public com.amazonaws.auth.AWSCredentials getCredentials() 
+    {
+        if (!Strings.isNullOrEmpty(config.getAccessKey()) && !Strings.isNullOrEmpty(config.getSecretKey())) {
          return new com.amazonaws.auth.AWSCredentials() {
            @Override
            public String getAWSAccessKeyId() {
@ -82,13 +92,56 @@ public class S3StorageDruidModule implements DruidModule
            }
          };
        }
-
-        @Override
-        public void refresh() {}
-      };
-    } else {
-      return new FileSessionCredentialsProvider(config.getFileSessionCredentials());
+        throw new AmazonClientException("Unable to load AWS credentials from druid AWSCredentialsConfig");
    }
+    
+    @Override
+    public void refresh() {}
+  }
+  
+  private static class LazyFileSessionCredentialsProvider implements AWSCredentialsProvider 
+  {
+    private AWSCredentialsConfig config;
+    private FileSessionCredentialsProvider provider;
+    
+    public LazyFileSessionCredentialsProvider(AWSCredentialsConfig config) {
+      this.config = config;
+    }
+    
+    private FileSessionCredentialsProvider getUnderlyingProvider() {
+      if (provider == null) {
+        synchronized (config) {
+          if (provider == null) {
+            provider = new FileSessionCredentialsProvider(config.getFileSessionCredentials());
+          }
+        }
+      }
+      return provider;
+    }
+    
+    @Override
+    public com.amazonaws.auth.AWSCredentials getCredentials() 
+    {
+      return getUnderlyingProvider().getCredentials();
+    }
+
+    @Override
+    public void refresh() {
+      getUnderlyingProvider().refresh();
+    }
+  }
+  
+  @Provides
+  @LazySingleton
+  public AWSCredentialsProvider getAWSCredentialsProvider(final AWSCredentialsConfig config)
+  {
+    return new AWSCredentialsProviderChain(
+           new ConfigDrivenAwsCredentialsConfigProvider(config),
+           new LazyFileSessionCredentialsProvider(config),
+           new EnvironmentVariableCredentialsProvider(),
+           new SystemPropertiesCredentialsProvider(),
+           new ProfileCredentialsProvider(),
+           new InstanceProfileCredentialsProvider());
  }

  @Provides
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
@ -67,6 +67,7 @@ import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.SortedSet;

@ -83,10 +84,15 @@ public class HadoopDruidIndexerConfig
  public static final Joiner tabJoiner = Joiner.on("\t");
  public static final ObjectMapper jsonMapper;

+  // workaround to pass down druid.processing.bitmap.type, see IndexGeneratorJob.run()
+  protected static final Properties properties;
+
+  private static final String DEFAULT_WORKING_PATH = "/tmp/druid-indexing";
+
  static {
    injector = Initialization.makeInjectorWithModules(
        GuiceInjectors.makeStartupInjector(),
-        ImmutableList.<Object>of(
+        ImmutableList.<Module>of(
            new Module()
            {
              @Override
@ -100,6 +106,7 @@ public class HadoopDruidIndexerConfig
        )
    );
    jsonMapper = injector.getInstance(ObjectMapper.class);
+    properties = injector.getInstance(Properties.class);
  }

  public static enum IndexJobCounters
@ -116,8 +123,8 @@ public class HadoopDruidIndexerConfig
  {
    // Eventually PathSpec needs to get rid of its Hadoop dependency, then maybe this can be ingested directly without
    // the Map<> intermediary
-    
-    if(argSpec.containsKey("spec")){
+
+    if (argSpec.containsKey("spec")) {
      return HadoopDruidIndexerConfig.jsonMapper.convertValue(
          argSpec,
          HadoopDruidIndexerConfig.class
@ -138,8 +145,8 @@ public class HadoopDruidIndexerConfig
      return fromMap(
          (Map<String, Object>) HadoopDruidIndexerConfig.jsonMapper.readValue(
              file, new TypeReference<Map<String, Object>>()
-          {
-          }
+              {
+              }
          )
      );
    }
@ -175,7 +182,7 @@ public class HadoopDruidIndexerConfig

  private volatile HadoopIngestionSpec schema;
  private volatile PathSpec pathSpec;
-  private volatile Map<DateTime,ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
+  private volatile Map<DateTime, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
  private volatile Map<ShardSpec, HadoopyShardSpec> hadoopShardSpecLookup = Maps.newHashMap();
  private final QueryGranularity rollupGran;

@ -193,17 +200,17 @@ public class HadoopDruidIndexerConfig
      final ShardSpec actualSpec = entry.getValue().get(0).getActualSpec();
      shardSpecLookups.put(
          entry.getKey(), actualSpec.getLookup(
-          Lists.transform(
-              entry.getValue(), new Function<HadoopyShardSpec, ShardSpec>()
-          {
-            @Override
-            public ShardSpec apply(HadoopyShardSpec input)
-            {
-              return input.getActualSpec();
-            }
-          }
+              Lists.transform(
+                  entry.getValue(), new Function<HadoopyShardSpec, ShardSpec>()
+                  {
+                    @Override
+                    public ShardSpec apply(HadoopyShardSpec input)
+                    {
+                      return input.getActualSpec();
+                    }
+                  }
+              )
          )
-      )
      );
      for (HadoopyShardSpec hadoopyShardSpec : entry.getValue()) {
        hadoopShardSpecLookup.put(hadoopyShardSpec.getActualSpec(), hadoopyShardSpec);
@ -212,7 +219,7 @@ public class HadoopDruidIndexerConfig
    this.rollupGran = schema.getDataSchema().getGranularitySpec().getQueryGranularity();
  }

-  @JsonProperty(value="spec")
+  @JsonProperty(value = "spec")
  public HadoopIngestionSpec getSchema()
  {
    return schema;
@ -333,7 +340,11 @@ public class HadoopDruidIndexerConfig
      return Optional.absent();
    }

-    final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart()).getShardSpec(rollupGran.truncate(inputRow.getTimestampFromEpoch()), inputRow);
+    final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart())
+                                                 .getShardSpec(
+                                                     rollupGran.truncate(inputRow.getTimestampFromEpoch()),
+                                                     inputRow
+                                                 );
    final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(actualSpec);

    return Optional.of(
@ -403,6 +414,12 @@ public class HadoopDruidIndexerConfig
    return schema.getTuningConfig().isPersistInHeap();
  }

+  public String getWorkingPath()
+  {
+    final String workingPath = schema.getTuningConfig().getWorkingPath();
+    return workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
+  }
+
  /******************************************
   Path helper logic
   ******************************************/
@ -418,7 +435,7 @@ public class HadoopDruidIndexerConfig
    return new Path(
        String.format(
            "%s/%s/%s",
-            schema.getTuningConfig().getWorkingPath(),
+            getWorkingPath(),
            schema.getDataSchema().getDataSource(),
            schema.getTuningConfig().getVersion().replace(":", "")
        )
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
@ -77,7 +77,9 @@ public class HadoopDruidIndexerJob implements Jobby
    if (metadataStorageUpdaterJob != null) {
      jobs.add(metadataStorageUpdaterJob);
    } else {
-      log.info("No updaterJobSpec set, not uploading to database");
+      log.info(
+          "No metadataStorageUpdaterJob set in the config. This is cool if you are running a hadoop index task, otherwise nothing will be uploaded to database."
+      );
    }

    jobs.add(
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java
@ -36,7 +36,6 @@ import java.util.Map;
@JsonTypeName("hadoop")
 public class HadoopTuningConfig implements TuningConfig
 {
-  private static final String DEFAULT_WORKING_PATH = "/tmp/druid-indexing";
  private static final PartitionsSpec DEFAULT_PARTITIONS_SPEC = HashedPartitionsSpec.makeDefaultHashedPartitionsSpec();
  private static final Map<DateTime, List<HadoopyShardSpec>> DEFAULT_SHARD_SPECS = ImmutableMap.<DateTime, List<HadoopyShardSpec>>of();
  private static final int DEFAULT_ROW_FLUSH_BOUNDARY = 80000;
@ -46,7 +45,7 @@ public class HadoopTuningConfig implements TuningConfig
  public static HadoopTuningConfig makeDefaultTuningConfig()
  {
    return new HadoopTuningConfig(
-        DEFAULT_WORKING_PATH,
+        null,
        new DateTime().toString(),
        DEFAULT_PARTITIONS_SPEC,
        DEFAULT_SHARD_SPECS,
@ -99,7 +98,7 @@ public class HadoopTuningConfig implements TuningConfig
      final @JsonProperty("aggregationBufferRatio") Float aggregationBufferRatio
  )
  {
-    this.workingPath = workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
+    this.workingPath = workingPath;
    this.version = version == null ? new DateTime().toString() : version;
    this.partitionsSpec = partitionsSpec == null ? DEFAULT_PARTITIONS_SPEC : partitionsSpec;
    this.shardSpecs = shardSpecs == null ? DEFAULT_SHARD_SPECS : shardSpecs;
--- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
@ -21,6 +21,7 @@ package io.druid.indexer;

 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.base.Optional;
+import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@ -44,6 +45,8 @@ import io.druid.segment.LoggingProgressIndicator;
 import io.druid.segment.ProgressIndicator;
 import io.druid.segment.QueryableIndex;
 import io.druid.segment.SegmentUtils;
+import io.druid.segment.data.BitmapSerde;
+import io.druid.segment.data.BitmapSerdeFactory;
 import io.druid.segment.incremental.IncrementalIndex;
 import io.druid.segment.incremental.IncrementalIndexSchema;
 import io.druid.segment.incremental.OffheapIncrementalIndex;
@ -180,6 +183,19 @@ public class IndexGeneratorJob implements Jobby

      config.addInputPaths(job);
      config.addJobProperties(job);
+
+      // hack to get druid.processing.bitmap property passed down to hadoop job.
+      // once IndexIO doesn't rely on globally injected properties, we can move this into the HadoopTuningConfig.
+      final String bitmapProperty = "druid.processing.bitmap.type";
+      final String bitmapType = HadoopDruidIndexerConfig.properties.getProperty(bitmapProperty);
+      if(bitmapType != null) {
+        for(String property : new String[] {"mapreduce.reduce.java.opts", "mapreduce.map.java.opts"}) {
+          // prepend property to allow overriding using hadoop.xxx properties by JobHelper.injectSystemProperties above
+          String value = Strings.nullToEmpty(job.getConfiguration().get(property));
+          job.getConfiguration().set(property, String.format("-D%s=%s %s", bitmapProperty, bitmapType, value));
+        }
+      }
+
      config.intoConfiguration(job);

      JobHelper.setupClasspath(config, job);
--- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
@ -63,7 +63,7 @@ public class JobHelper

    final Configuration conf = groupByJob.getConfiguration();
    final FileSystem fs = FileSystem.get(conf);
-    Path distributedClassPath = new Path(config.getSchema().getTuningConfig().getWorkingPath(), "classpath");
+    Path distributedClassPath = new Path(config.getWorkingPath(), "classpath");

    if (fs instanceof LocalFileSystem) {
      return;
--- a/indexing-service/src/main/java/io/druid/guice/IndexingServiceModuleHelper.java
+++ b/indexing-service/src/main/java/io/druid/guice/IndexingServiceModuleHelper.java
@ -22,6 +22,7 @@ package io.druid.guice;
 import com.google.inject.Binder;
 import io.druid.indexing.overlord.config.ForkingTaskRunnerConfig;
 import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
+import io.druid.server.initialization.IndexerZkConfig;

 /**
 */
@ -31,5 +32,6 @@ public class IndexingServiceModuleHelper
  {
    JsonConfigProvider.bind(binder, "druid.indexer.runner", ForkingTaskRunnerConfig.class);
    JsonConfigProvider.bind(binder, "druid.indexer.runner", RemoteTaskRunnerConfig.class);
+    JsonConfigProvider.bind(binder, "druid.zk.paths.indexer", IndexerZkConfig.class);
  }
 }
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
@ -111,13 +111,11 @@ public class HadoopIndexTask extends AbstractTask
    this.spec = spec;

    // Some HadoopIngestionSpec stuff doesn't make sense in the context of the indexing service
-    if (this.spec.getTuningConfig().getWorkingPath() != null) {
-      log.error("workingPath should be absent in your spec! Ignoring");
-    }
    Preconditions.checkArgument(
        this.spec.getIOConfig().getSegmentOutputPath() == null,
        "segmentOutputPath must be absent"
    );
+    Preconditions.checkArgument(this.spec.getTuningConfig().getWorkingPath() == null, "workingPath must be absent");
    Preconditions.checkArgument(
        this.spec.getIOConfig().getMetadataUpdateSpec() == null,
        "metadataUpdateSpec must be absent"
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java
@ -32,8 +32,6 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.io.ByteSource;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.InputSupplier;
 import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
@ -53,6 +51,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
 import io.druid.indexing.overlord.setup.WorkerSelectStrategy;
 import io.druid.indexing.worker.TaskAnnouncement;
 import io.druid.indexing.worker.Worker;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import io.druid.tasklogs.TaskLogStreamer;
 import org.apache.commons.lang.mutable.MutableInt;
@ -104,7 +103,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer

  private final ObjectMapper jsonMapper;
  private final RemoteTaskRunnerConfig config;
-  private final ZkPathsConfig zkPaths;
+  private final IndexerZkConfig indexerZkConfig;
  private final CuratorFramework cf;
  private final PathChildrenCacheFactory pathChildrenCacheFactory;
  private final PathChildrenCache workerPathCache;
@ -131,7 +130,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
  public RemoteTaskRunner(
      ObjectMapper jsonMapper,
      RemoteTaskRunnerConfig config,
-      ZkPathsConfig zkPaths,
+      IndexerZkConfig indexerZkConfig,
      CuratorFramework cf,
      PathChildrenCacheFactory pathChildrenCacheFactory,
      HttpClient httpClient,
@ -140,10 +139,10 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
  {
    this.jsonMapper = jsonMapper;
    this.config = config;
-    this.zkPaths = zkPaths;
+    this.indexerZkConfig = indexerZkConfig;
    this.cf = cf;
    this.pathChildrenCacheFactory = pathChildrenCacheFactory;
-    this.workerPathCache = pathChildrenCacheFactory.make(cf, zkPaths.getIndexerAnnouncementPath());
+    this.workerPathCache = pathChildrenCacheFactory.make(cf, indexerZkConfig.getAnnouncementsPath());
    this.httpClient = httpClient;
    this.strategy = strategy;
  }
@ -498,7 +497,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
    } else {
      final String workerId = worker.getHost();
      log.info("Cleaning up task[%s] on worker[%s]", taskId, workerId);
-      final String statusPath = JOINER.join(zkPaths.getIndexerStatusPath(), workerId, taskId);
+      final String statusPath = JOINER.join(indexerZkConfig.getStatus(), workerId, taskId);
      try {
        cf.delete().guaranteed().forPath(statusPath);
      }
@ -584,7 +583,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
      throw new ISE("Length of raw bytes for task too large[%,d > %,d]", rawBytes.length, config.getMaxZnodeBytes());
    }

-    String taskPath = JOINER.join(zkPaths.getIndexerTaskPath(), theWorker.getHost(), task.getId());
+    String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), theWorker.getHost(), task.getId());

    if (cf.checkExists().forPath(taskPath) == null) {
      cf.create()
@ -644,7 +643,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
    log.info("Worker[%s] reportin' for duty!", worker.getHost());

    try {
-      final String workerStatusPath = JOINER.join(zkPaths.getIndexerStatusPath(), worker.getHost());
+      final String workerStatusPath = JOINER.join(indexerZkConfig.getStatus(), worker.getHost());
      final PathChildrenCache statusCache = pathChildrenCacheFactory.make(cf, workerStatusPath);
      final SettableFuture<ZkWorker> retVal = SettableFuture.create();
      final ZkWorker zkWorker = new ZkWorker(
@ -789,7 +788,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
    if (zkWorker != null) {
      try {
        List<String> tasksToFail = Lists.newArrayList(
-            cf.getChildren().forPath(JOINER.join(zkPaths.getIndexerTaskPath(), worker.getHost()))
+            cf.getChildren().forPath(JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost()))
        );
        log.info("[%s]: Found %d tasks assigned", worker.getHost(), tasksToFail.size());

@ -807,7 +806,7 @@ public class RemoteTaskRunner implements TaskRunner, TaskLogStreamer
        for (String assignedTask : tasksToFail) {
          RemoteTaskRunnerWorkItem taskRunnerWorkItem = runningTasks.remove(assignedTask);
          if (taskRunnerWorkItem != null) {
-            String taskPath = JOINER.join(zkPaths.getIndexerTaskPath(), worker.getHost(), assignedTask);
+            String taskPath = JOINER.join(indexerZkConfig.getTasksPath(), worker.getHost(), assignedTask);
            if (cf.checkExists().forPath(taskPath) != null) {
              cf.delete().guaranteed().forPath(taskPath);
            }
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerFactory.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunnerFactory.java
@ -29,6 +29,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
 import io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy;
 import io.druid.indexing.overlord.setup.WorkerBehaviorConfig;
 import io.druid.indexing.overlord.setup.WorkerSelectStrategy;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import org.apache.curator.framework.CuratorFramework;

@ -38,7 +39,7 @@ public class RemoteTaskRunnerFactory implements TaskRunnerFactory
 {
  private final CuratorFramework curator;
  private final RemoteTaskRunnerConfig remoteTaskRunnerConfig;
-  private final ZkPathsConfig zkPaths;
+  private final IndexerZkConfig zkPaths;
  private final ObjectMapper jsonMapper;
  private final HttpClient httpClient;
  private final WorkerSelectStrategy strategy;
@ -47,7 +48,7 @@ public class RemoteTaskRunnerFactory implements TaskRunnerFactory
  public RemoteTaskRunnerFactory(
      final CuratorFramework curator,
      final RemoteTaskRunnerConfig remoteTaskRunnerConfig,
-      final ZkPathsConfig zkPaths,
+      final IndexerZkConfig zkPaths,
      final ObjectMapper jsonMapper,
      @Global final HttpClient httpClient,
      final Supplier<WorkerBehaviorConfig> workerBehaviourConfigSupplier
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/TaskMaster.java
@ -38,6 +38,7 @@ import io.druid.indexing.overlord.config.TaskQueueConfig;
 import io.druid.indexing.overlord.autoscaling.ResourceManagementScheduler;
 import io.druid.indexing.overlord.autoscaling.ResourceManagementSchedulerFactory;
 import io.druid.server.DruidNode;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.recipes.leader.LeaderSelector;
@ -74,7 +75,7 @@ public class TaskMaster
      final TaskStorage taskStorage,
      final TaskActionClientFactory taskActionClientFactory,
      @Self final DruidNode node,
-      final ZkPathsConfig zkPaths,
+      final IndexerZkConfig zkPaths,
      final TaskRunnerFactory runnerFactory,
      final ResourceManagementSchedulerFactory managementSchedulerFactory,
      final CuratorFramework curator,
@ -85,7 +86,7 @@ public class TaskMaster
    this.taskActionClientFactory = taskActionClientFactory;
    this.leaderSelector = new LeaderSelector(
        curator,
-        zkPaths.getIndexerLeaderLatchPath(),
+        zkPaths.getLeaderLatchPath(),
        new LeaderSelectorListener()
        {
          @Override
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java
@ -19,6 +19,7 @@

 package io.druid.indexing.overlord;

+import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
@ -40,13 +41,17 @@ import io.druid.indexing.common.task.Task;
 import io.druid.query.NoopQueryRunner;
 import io.druid.query.Query;
 import io.druid.query.QueryRunner;
+import io.druid.query.QueryRunnerFactoryConglomerate;
 import io.druid.query.QuerySegmentWalker;
 import io.druid.query.SegmentDescriptor;
+import io.druid.query.UnionQueryRunner;
 import org.apache.commons.io.FileUtils;
 import org.joda.time.Interval;

+import javax.annotation.Nullable;
 import java.io.File;
 import java.util.Collection;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentSkipListSet;
@ -59,16 +64,18 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
  private final TaskToolboxFactory toolboxFactory;
  private final ListeningExecutorService exec;
  private final Set<ThreadPoolTaskRunnerWorkItem> runningItems = new ConcurrentSkipListSet<>();
-
+  private final QueryRunnerFactoryConglomerate conglomerate;
  private static final EmittingLogger log = new EmittingLogger(ThreadPoolTaskRunner.class);

  @Inject
  public ThreadPoolTaskRunner(
-      TaskToolboxFactory toolboxFactory
+      TaskToolboxFactory toolboxFactory,
+      QueryRunnerFactoryConglomerate conglomerate
  )
  {
    this.toolboxFactory = Preconditions.checkNotNull(toolboxFactory, "toolboxFactory");
    this.exec = MoreExecutors.listeningDecorator(Execs.singleThreaded("task-runner-%d"));
+    this.conglomerate = conglomerate;
  }

  @LifecycleStop
@ -86,19 +93,19 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
    runningItems.add(taskRunnerWorkItem);
    Futures.addCallback(
        statusFuture, new FutureCallback<TaskStatus>()
-    {
-      @Override
-      public void onSuccess(TaskStatus result)
-      {
-        runningItems.remove(taskRunnerWorkItem);
-      }
+        {
+          @Override
+          public void onSuccess(TaskStatus result)
+          {
+            runningItems.remove(taskRunnerWorkItem);
+          }

-      @Override
-      public void onFailure(Throwable t)
-      {
-        runningItems.remove(taskRunnerWorkItem);
-      }
-    }
+          @Override
+          public void onFailure(Throwable t)
+          {
+            runningItems.remove(taskRunnerWorkItem);
+          }
+        }
    );

    return statusFuture;
@ -150,29 +157,43 @@ public class ThreadPoolTaskRunner implements TaskRunner, QuerySegmentWalker
    return getQueryRunnerImpl(query);
  }

-  private <T> QueryRunner<T> getQueryRunnerImpl(Query<T> query)
+  private <T> QueryRunner<T> getQueryRunnerImpl(final Query<T> query)
  {
-    QueryRunner<T> queryRunner = null;
-    final String queryDataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
+    return new UnionQueryRunner<>(
+        Iterables.transform(
+            query.getDataSource().getNames(), new Function<String, QueryRunner>()
+            {
+              @Override
+              public QueryRunner apply(String queryDataSource)
+              {
+                QueryRunner<T> queryRunner = null;

-    for (final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem : ImmutableList.copyOf(runningItems)) {
-      final Task task = taskRunnerWorkItem.getTask();
-      if (task.getDataSource().equals(queryDataSource)) {
-        final QueryRunner<T> taskQueryRunner = task.getQueryRunner(query);
+                for (final ThreadPoolTaskRunnerWorkItem taskRunnerWorkItem : ImmutableList.copyOf(runningItems)) {
+                  final Task task = taskRunnerWorkItem.getTask();
+                  if (task.getDataSource().equals(queryDataSource)) {
+                    final QueryRunner<T> taskQueryRunner = task.getQueryRunner(query);

-        if (taskQueryRunner != null) {
-          if (queryRunner == null) {
-            queryRunner = taskQueryRunner;
-          } else {
-            log.makeAlert("Found too many query runners for datasource")
-               .addData("dataSource", queryDataSource)
-               .emit();
-          }
-        }
-      }
-    }
+                    if (taskQueryRunner != null) {
+                      if (queryRunner == null) {
+                        queryRunner = taskQueryRunner;
+                      } else {
+                        log.makeAlert("Found too many query runners for datasource")
+                           .addData("dataSource", queryDataSource)
+                           .emit();
+                      }
+                    }
+                  }
+                }
+                if (queryRunner != null) {
+                  return queryRunner;
+                } else {
+                  return new NoopQueryRunner();
+                }
+              }
+            }
+        ), conglomerate.findFactory(query).getToolchest()
+    );

-    return queryRunner == null ? new NoopQueryRunner<T>() : queryRunner;
  }

  private static class ThreadPoolTaskRunnerWorkItem extends TaskRunnerWorkItem
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/autoscaling/ResourceManagementSchedulerConfig.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/autoscaling/ResourceManagementSchedulerConfig.java
@ -34,7 +34,7 @@ public class ResourceManagementSchedulerConfig
  private Period provisionPeriod = new Period("PT1M");

  @JsonProperty
-  private Period terminatePeriod = new Period("PT1H");
+  private Period terminatePeriod = new Period("PT5M");

  @JsonProperty
  private DateTime originTime = new DateTime("2012-01-01T00:55:00.000Z");
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/autoscaling/SimpleResourceManagementConfig.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/autoscaling/SimpleResourceManagementConfig.java
@ -27,7 +27,7 @@ import org.joda.time.Period;
 public class SimpleResourceManagementConfig
 {
  @JsonProperty
-  private Period workerIdleTimeout = new Period("PT10m");
+  private Period workerIdleTimeout = new Period("PT90m");

  @JsonProperty
  private Period maxScalingDuration = new Period("PT15M");
--- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java
+++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java
@ -412,9 +412,7 @@ public class OverlordResource
    try {
      final Optional<ByteSource> stream = taskLogStreamer.streamTaskLog(taskid, offset);
      if (stream.isPresent()) {
-        try(InputStream istream = stream.get().openStream()) {
-          return Response.ok(istream).build();
-        }
+        return Response.ok(stream.get().openStream()).build();
      } else {
        return Response.status(Response.Status.NOT_FOUND)
                       .entity(
--- a/indexing-service/src/main/java/io/druid/indexing/worker/WorkerCuratorCoordinator.java
+++ b/indexing-service/src/main/java/io/druid/indexing/worker/WorkerCuratorCoordinator.java
@ -31,7 +31,7 @@ import com.metamx.common.lifecycle.LifecycleStop;
 import com.metamx.common.logger.Logger;
 import io.druid.curator.announcement.Announcer;
 import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
-import io.druid.server.initialization.ZkPathsConfig;
+import io.druid.server.initialization.IndexerZkConfig;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.zookeeper.CreateMode;
 import org.joda.time.DateTime;
@ -63,7 +63,7 @@ public class WorkerCuratorCoordinator
  @Inject
  public WorkerCuratorCoordinator(
      ObjectMapper jsonMapper,
-      ZkPathsConfig zkPaths,
+      IndexerZkConfig indexerZkConfig,
      RemoteTaskRunnerConfig config,
      CuratorFramework curatorFramework,
      Worker worker
@ -76,9 +76,9 @@ public class WorkerCuratorCoordinator

    this.announcer = new Announcer(curatorFramework, MoreExecutors.sameThreadExecutor());

-    this.baseAnnouncementsPath = getPath(Arrays.asList(zkPaths.getIndexerAnnouncementPath(), worker.getHost()));
-    this.baseTaskPath = getPath(Arrays.asList(zkPaths.getIndexerTaskPath(), worker.getHost()));
-    this.baseStatusPath = getPath(Arrays.asList(zkPaths.getIndexerStatusPath(), worker.getHost()));
+    this.baseAnnouncementsPath = getPath(Arrays.asList(indexerZkConfig.getAnnouncementsPath(), worker.getHost()));
+    this.baseTaskPath = getPath(Arrays.asList(indexerZkConfig.getTasksPath(), worker.getHost()));
+    this.baseStatusPath = getPath(Arrays.asList(indexerZkConfig.getStatus(), worker.getHost()));
  }

  @LifecycleStart
--- a/indexing-service/src/main/java/io/druid/indexing/worker/http/WorkerResource.java
+++ b/indexing-service/src/main/java/io/druid/indexing/worker/http/WorkerResource.java
@ -41,6 +41,7 @@ import javax.ws.rs.Produces;
 import javax.ws.rs.QueryParam;
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
+import java.io.IOException;
 import java.io.InputStream;

 /**
@ -167,10 +168,10 @@ public class WorkerResource
    final Optional<ByteSource> stream = taskRunner.streamTaskLog(taskid, offset);

    if (stream.isPresent()) {
-      try (InputStream logStream = stream.get().openStream()) {
-        return Response.ok(logStream).build();
+      try {
+        return Response.ok(stream.get().openStream()).build();
      }
-      catch (Exception e) {
+      catch (IOException e) {
        log.warn(e, "Failed to read log for task: %s", taskid);
        return Response.serverError().build();
      }
--- a/indexing-service/src/main/java/io/druid/server/initialization/IndexerZkConfig.java
+++ b/indexing-service/src/main/java/io/druid/server/initialization/IndexerZkConfig.java
@ -0,0 +1,102 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013, 2014  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.server.initialization;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.curator.utils.ZKPaths;
+
+/**
+ *
+ */
+public class IndexerZkConfig
+{
+  @JsonCreator
+  public IndexerZkConfig(
+      @JacksonInject ZkPathsConfig zkPathsConfig,
+      @JsonProperty("base") String base,
+      @JsonProperty("announcementsPath") String announcementsPath,
+      @JsonProperty("tasksPath") String tasksPath,
+      @JsonProperty("status") String status,
+      @JsonProperty("leaderLatchPath") String leaderLatchPath
+  )
+  {
+    this.zkPathsConfig = zkPathsConfig;
+    this.base = base;
+    this.announcementsPath = announcementsPath;
+    this.tasksPath = tasksPath;
+    this.status = status;
+    this.leaderLatchPath = leaderLatchPath;
+  }
+
+  @JacksonInject
+  private final ZkPathsConfig zkPathsConfig;
+
+  @JsonProperty
+  private final String base;
+
+  @JsonProperty
+  private final String announcementsPath;
+
+  @JsonProperty
+  private final String tasksPath;
+
+  @JsonProperty
+  private final String status;
+
+  @JsonProperty
+  private final String leaderLatchPath;
+
+  private String defaultIndexerPath(final String subPath)
+  {
+    return getZkPathsConfig().defaultPath(ZKPaths.makePath(getBase(), subPath));
+  }
+
+  public String getBase()
+  {
+    return base == null ? "indexer" : base;
+  }
+
+  public String getAnnouncementsPath()
+  {
+    return announcementsPath == null ? defaultIndexerPath("announcements") : announcementsPath;
+  }
+
+  public String getTasksPath()
+  {
+    return tasksPath == null ? defaultIndexerPath("tasks") : tasksPath;
+  }
+
+  public String getStatus()
+  {
+    return status == null ? defaultIndexerPath("status") : status;
+  }
+
+  public String getLeaderLatchPath()
+  {
+    return leaderLatchPath == null ? defaultIndexerPath("leaderLatchPath") : leaderLatchPath;
+  }
+
+  public ZkPathsConfig getZkPathsConfig()
+  {
+    return zkPathsConfig;
+  }
+}
--- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java
@ -20,6 +20,7 @@
 package io.druid.indexing.common.task;

 import com.google.common.collect.Lists;
+import com.google.common.io.Files;
 import com.metamx.common.Granularity;
 import io.druid.data.input.impl.CSVParseSpec;
 import io.druid.data.input.impl.DimensionsSpec;
@ -57,7 +58,10 @@ public class IndexTaskTest
  @Test
  public void testDeterminePartitions() throws Exception
  {
-    File tmpFile = File.createTempFile("druid", "index");
+    File tmpDir = Files.createTempDir();
+    tmpDir.deleteOnExit();
+
+    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    tmpFile.deleteOnExit();

    PrintWriter writer = new PrintWriter(tmpFile);
@ -97,7 +101,7 @@ public class IndexTaskTest
            ),
            new IndexTask.IndexIOConfig(
                new LocalFirehoseFactory(
-                    tmpFile.getParentFile(),
+                    tmpDir,
                    "druid*",
                    null
                )
--- a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java
@ -43,6 +43,7 @@ import io.druid.indexing.overlord.setup.FillCapacityWorkerSelectStrategy;
 import io.druid.indexing.worker.TaskAnnouncement;
 import io.druid.indexing.worker.Worker;
 import io.druid.jackson.DefaultObjectMapper;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
@ -57,6 +58,7 @@ import org.junit.Test;

 import java.util.Set;
 import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;

 public class RemoteTaskRunnerTest
 {
@ -66,6 +68,7 @@ public class RemoteTaskRunnerTest
  private static final String announcementsPath = String.format("%s/indexer/announcements/worker", basePath);
  private static final String tasksPath = String.format("%s/indexer/tasks/worker", basePath);
  private static final String statusPath = String.format("%s/indexer/status/worker", basePath);
+  private static final int TIMEOUT_SECONDS = 5;

  private TestingCluster testingCluster;
  private CuratorFramework cf;
@ -282,7 +285,7 @@ public class RemoteTaskRunnerTest

    cf.delete().forPath(joiner.join(statusPath, task.getId()));

-    TaskStatus status = future.get();
+    TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);

    Assert.assertEquals(status.getStatusCode(), TaskStatus.Status.FAILED);
  }
@ -335,7 +338,7 @@ public class RemoteTaskRunnerTest

    ListenableFuture<TaskStatus> future = remoteTaskRunner.run(task);

-    TaskStatus status = future.get();
+    TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);

    Assert.assertEquals(TaskStatus.Status.SUCCESS, status.getStatusCode());
  }
@ -353,7 +356,7 @@ public class RemoteTaskRunnerTest

    cf.delete().forPath(announcementsPath);

-    TaskStatus status = future.get();
+    TaskStatus status = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);

    Assert.assertEquals(TaskStatus.Status.FAILED, status.getStatusCode());
  }
@ -393,14 +396,14 @@ public class RemoteTaskRunnerTest
    remoteTaskRunner = new RemoteTaskRunner(
        jsonMapper,
        config,
-        new ZkPathsConfig()
+        new IndexerZkConfig(new ZkPathsConfig()
        {
          @Override
-          public String getZkBasePath()
+          public String getBase()
          {
            return basePath;
          }
-        },
+        },null,null,null,null,null),
        cf,
        new SimplePathChildrenCacheFactory.Builder().build(),
        null,
--- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java
@ -334,7 +334,7 @@ public class TaskLifecycleTest
        ),
        new DefaultObjectMapper()
    );
-    tr = new ThreadPoolTaskRunner(tb);
+    tr = new ThreadPoolTaskRunner(tb, null);
    tq = new TaskQueue(tqc, ts, tr, tac, tl, emitter);
    tq.start();
  }
--- a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java
@ -50,6 +50,7 @@ import io.druid.segment.loading.LocalDataSegmentPuller;
 import io.druid.segment.loading.OmniSegmentLoader;
 import io.druid.segment.loading.SegmentLoaderConfig;
 import io.druid.segment.loading.StorageLocationConfig;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import junit.framework.Assert;
 import org.apache.curator.framework.CuratorFramework;
@ -139,14 +140,15 @@ public class WorkerTaskMonitorTest

    workerCuratorCoordinator = new WorkerCuratorCoordinator(
        jsonMapper,
+        new IndexerZkConfig(
        new ZkPathsConfig()
        {
          @Override
-          public String getZkBasePath()
+          public String getBase()
          {
            return basePath;
          }
-        },
+        },null,null,null,null,null),
        new TestRemoteTaskRunnerConfig(),
        cf,
        worker
@ -180,7 +182,8 @@ public class WorkerTaskMonitorTest
                    }
                )
            ), jsonMapper
-            )
+            ),
+            null
        ),
        new WorkerConfig().setCapacity(1)
    );
--- a/indexing-service/src/test/java/io/druid/indexing/worker/http/WorkerResourceTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/worker/http/WorkerResourceTest.java
@ -25,6 +25,7 @@ import io.druid.indexing.overlord.config.RemoteTaskRunnerConfig;
 import io.druid.indexing.worker.Worker;
 import io.druid.indexing.worker.WorkerCuratorCoordinator;
 import io.druid.jackson.DefaultObjectMapper;
+import io.druid.server.initialization.IndexerZkConfig;
 import io.druid.server.initialization.ZkPathsConfig;
 import junit.framework.Assert;
 import org.apache.curator.framework.CuratorFramework;
@ -76,14 +77,14 @@ public class WorkerResourceTest

    curatorCoordinator = new WorkerCuratorCoordinator(
        jsonMapper,
-        new ZkPathsConfig()
+        new IndexerZkConfig(new ZkPathsConfig()
        {
          @Override
-          public String getZkBasePath()
+          public String getBase()
          {
            return basePath;
          }
-        },
+        },null,null,null,null,null),
        new RemoteTaskRunnerConfig(),
        cf,
        worker
--- a/indexing-service/src/test/java/io/druid/server/initialization/IndexerZkConfigTest.java
+++ b/indexing-service/src/test/java/io/druid/server/initialization/IndexerZkConfigTest.java
@ -0,0 +1,214 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013, 2014  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.server.initialization;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
+import com.google.inject.Binder;
+import com.google.inject.Injector;
+import com.google.inject.Module;
+import com.google.inject.name.Names;
+import io.druid.curator.CuratorConfig;
+import io.druid.guice.GuiceInjectors;
+import io.druid.guice.JsonConfigProvider;
+import io.druid.guice.JsonConfigurator;
+import io.druid.initialization.Initialization;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.UUID;
+
+/**
+ *
+ */
+public class IndexerZkConfigTest
+{
+  private static final String indexerPropertyString = "test.druid.zk.paths.indexer";
+  private static final String zkServiceConfigString = "test.druid.zk.paths";
+  private static final Collection<String> clobberableProperties = new HashSet<>();
+
+  private static final Module simpleZkConfigModule = new Module()
+  {
+    @Override
+    public void configure(Binder binder)
+    {
+      binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/test");
+      binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
+      // See IndexingServiceModuleHelper
+      JsonConfigProvider.bind(binder, indexerPropertyString, IndexerZkConfig.class);
+      JsonConfigProvider.bind(
+          binder, zkServiceConfigString,
+          ZkPathsConfig.class
+      );
+    }
+  };
+
+  @BeforeClass
+  public static void setup()
+  {
+    for (Field field : IndexerZkConfig.class.getDeclaredFields()) {
+      if (null != field.getAnnotation(JsonProperty.class)) {
+        clobberableProperties.add(String.format("%s.%s", indexerPropertyString, field.getName()));
+      }
+    }
+    for (Field field : ZkPathsConfig.class.getDeclaredFields()) {
+      if (null != field.getAnnotation(JsonProperty.class)) {
+        clobberableProperties.add(String.format("%s.%s", zkServiceConfigString, field.getName()));
+      }
+    }
+  }
+
+  private Properties propertyValues = new Properties();
+  private int assertions = 0;
+
+  @Before
+  public void setupTest()
+  {
+    for (String property : clobberableProperties) {
+      propertyValues.put(property, UUID.randomUUID().toString());
+    }
+    assertions = 0;
+  }
+
+
+  private void validateEntries(ZkPathsConfig zkPathsConfig)
+      throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
+  {
+    for (Field field : ZkPathsConfig.class.getDeclaredFields()) {
+      if (null != field.getAnnotation(JsonProperty.class)) {
+        String property = String.format("%s.%s", zkServiceConfigString, field.getName());
+        String getter = String.format(
+            "get%s%s",
+            field.getName().substring(0, 1).toUpperCase(),
+            field.getName().substring(1)
+        );
+        Method method = ZkPathsConfig.class.getDeclaredMethod(getter);
+        Assert.assertEquals(propertyValues.get(property), method.invoke(zkPathsConfig));
+        ++assertions;
+      }
+    }
+  }
+
+  private void validateEntries(IndexerZkConfig indexerZkConfig)
+      throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
+  {
+    for (Field field : IndexerZkConfig.class.getDeclaredFields()) {
+      if (null != field.getAnnotation(JsonProperty.class)) {
+        String property = String.format("%s.%s", indexerPropertyString, field.getName());
+        String getter = String.format(
+            "get%s%s",
+            field.getName().substring(0, 1).toUpperCase(),
+            field.getName().substring(1)
+        );
+        Method method = IndexerZkConfig.class.getDeclaredMethod(getter);
+        Assert.assertEquals(propertyValues.get(property), method.invoke(indexerZkConfig));
+        ++assertions;
+      }
+    }
+  }
+
+  @Test
+  public void testNullConfig(){
+    propertyValues.clear();
+
+    final Injector injector = Initialization.makeInjectorWithModules(
+        GuiceInjectors.makeStartupInjector(),
+        ImmutableList.<Module>of(simpleZkConfigModule)
+    );
+    JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
+
+    JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(zkServiceConfigString, ZkPathsConfig.class);
+    zkPathsConfig.inject(propertyValues, configurator);
+
+    JsonConfigProvider<IndexerZkConfig> indexerZkConfig = JsonConfigProvider.of(
+        indexerPropertyString,
+        IndexerZkConfig.class
+    );
+    indexerZkConfig.inject(propertyValues, configurator);
+
+    Assert.assertEquals("/druid/indexer/leaderLatchPath", indexerZkConfig.get().get().getLeaderLatchPath());
+  }
+
+  @Test
+  public void testSimpleConfig() throws IllegalAccessException, NoSuchMethodException, InvocationTargetException
+  {
+    final Injector injector = Initialization.makeInjectorWithModules(
+        GuiceInjectors.makeStartupInjector(),
+        ImmutableList.<Module>of(simpleZkConfigModule)
+    );
+    JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
+
+    JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(zkServiceConfigString, ZkPathsConfig.class);
+    zkPathsConfig.inject(propertyValues, configurator);
+
+    JsonConfigProvider<IndexerZkConfig> indexerZkConfig = JsonConfigProvider.of(
+        indexerPropertyString,
+        IndexerZkConfig.class
+    );
+    indexerZkConfig.inject(propertyValues, configurator);
+
+
+    IndexerZkConfig zkConfig = indexerZkConfig.get().get();
+    ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get().get();
+
+    validateEntries(zkConfig);
+    validateEntries(zkPathsConfig1);
+    Assert.assertEquals(clobberableProperties.size(), assertions);
+  }
+
+  @Test
+  public void testExactConfig(){
+    final Injector injector = Initialization.makeInjectorWithModules(
+        GuiceInjectors.makeStartupInjector(),
+        ImmutableList.<Module>of(simpleZkConfigModule)
+    );
+    propertyValues.setProperty(zkServiceConfigString + ".base", "/druid/metrics");
+
+
+    JsonConfigurator configurator = injector.getBinding(JsonConfigurator.class).getProvider().get();
+
+    JsonConfigProvider<ZkPathsConfig> zkPathsConfig = JsonConfigProvider.of(
+        zkServiceConfigString,
+        ZkPathsConfig.class
+    );
+
+    zkPathsConfig.inject(propertyValues, configurator);
+
+    ZkPathsConfig zkPathsConfig1 = zkPathsConfig.get().get();
+
+    IndexerZkConfig indexerZkConfig = new IndexerZkConfig(zkPathsConfig1,null,null,null,null,null);
+
+    Assert.assertEquals("indexer", indexerZkConfig.getBase());
+    Assert.assertEquals("/druid/metrics/indexer/announcements", indexerZkConfig.getAnnouncementsPath());
+  }
+}
--- a/integration-tests/README.md
+++ b/integration-tests/README.md
@ -0,0 +1,90 @@
+Integration Testing
+=========================
+
+## Installing Docker and Running
+
+Please refer to instructions at [https://github.com/druid-io/docker-druid/blob/master/docker-install.md](https://github.com/druid-io/docker-druid/blob/master/docker-install.md)
+
+Instead of running
+```
+boot2docker init
+```
+
+run instead
+```
+boot2docker init -m 6000
+```
+
+Make sure that you have at least 6GB of memory available before you run the tests.
+
+Set the docker ip via:
+```
+export DOCKER_IP=$(boot2docker ip 2>/dev/null)
+```
+
+Verify that docker is running by issuing the following command:
+
+```
+docker info
+```
+
+Running Integration tests
+=========================
+
+## Running tests using mvn
+
+To run all the tests using mvn run the following command -
+'''''
+  mvn verify -P integration-tests
+'''''
+
+To run only a single test using mvn run following command -
+'''''
+  mvn verify -P integration-tests -Dit.test=<test_name>
+'''''
+
+
+Writing a New Test
+===============
+
+## What should we cover in integration tests
+
+For every end-user functionality provided by druid we should have an integration-test verifying the correctness.
+
+## Rules to be followed while writing a new integration test
+
+### Every Integration Test must follow these rules
+
+1) Name of the test must start with a prefix "IT"
+2) A test should be independent of other tests
+3) Tests are to be written in TestNG style ([http://testng.org/doc/documentation-main.html#methods](http://testng.org/doc/documentation-main.html#methods))
+4) If a test loads some data it is the responsibility of the test to clean up the data from the cluster
+
+### How to use Guice Dependency Injection in a test
+
+A test can access different helper and utility classes provided by test-framework in order to access Coordinator,Broker etc..
+To mark a test be able to use Guice Dependency Injection -
+Annotate the test class with the below annotation
+
+ '''''''
+ @Guice(moduleFactory = DruidTestModuleFactory.class)
+ '''''''
+This will tell the test framework that the test class needs to be constructed using guice.
+
+### Helper Classes provided
+
+1) IntegrationTestingConfig - configuration of the test
+2) CoordinatorResourceTestClient - httpclient for coordinator endpoints
+3) OverlordResourceTestClient - httpclient for indexer endpoints
+4) QueryResourceTestClient - httpclient for broker endpoints
+
+### Static Utility classes
+
+1) RetryUtil - provides methods to retry an operation until it succeeds for configurable no. of times
+2) FromFileTestQueryHelper - reads queries with expected results from file and executes them and verifies the results using ResultVerifier
+
+Refer ITIndexerTest as an example on how to use dependency Injection
+
+TODOS
+=======================
+1) Remove the patch for TestNG after resolution of Surefire-622
--- a/integration-tests/docker/Dockerfile
+++ b/integration-tests/docker/Dockerfile
@ -0,0 +1,71 @@
+FROM ubuntu:14.04
+
+# Add Java 7 repository
+RUN apt-get update
+RUN apt-get install -y software-properties-common
+RUN apt-add-repository -y ppa:webupd8team/java
+RUN apt-get update
+
+# Oracle Java 7
+RUN echo oracle-java-7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
+RUN apt-get install -y oracle-java7-installer
+RUN apt-get install -y oracle-java7-set-default
+
+# MySQL (Metadata store)
+RUN apt-get install -y mysql-server
+
+# Supervisor
+RUN apt-get install -y supervisor
+
+# Maven
+RUN wget -q -O - http://www.us.apache.org/dist/maven/maven-3/3.2.5/binaries/apache-maven-3.2.5-bin.tar.gz | tar -xzf - -C /usr/local
+RUN ln -s /usr/local/apache-maven-3.2.1 /usr/local/apache-maven
+RUN ln -s /usr/local/apache-maven/bin/mvn /usr/local/bin/mvn
+
+# Zookeeper
+RUN wget -q -O - http://www.us.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xzf - -C /usr/local
+RUN cp /usr/local/zookeeper-3.4.6/conf/zoo_sample.cfg /usr/local/zookeeper-3.4.6/conf/zoo.cfg
+RUN ln -s /usr/local/zookeeper-3.4.6 /usr/local/zookeeper
+
+# git
+RUN apt-get install -y git
+
+# Druid system user
+RUN adduser --system --group --no-create-home druid
+RUN mkdir -p /var/lib/druid
+RUN chown druid:druid /var/lib/druid
+
+# Add druid jars
+ADD lib/* /usr/local/druid/lib/
+
+WORKDIR /
+
+# Setup metadata store
+RUN /etc/init.d/mysql start && echo "GRANT ALL ON druid.* TO 'druid'@'%' IDENTIFIED BY 'diurd'; CREATE database druid;" | mysql -u root && /etc/init.d/mysql stop
+
+# Add sample data
+RUN /etc/init.d/mysql start && java -Ddruid.metadata.storage.type=mysql -cp "/usr/local/druid/lib/*" io.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd && /etc/init.d/mysql stop
+ADD sample-data.sql sample-data.sql
+RUN /etc/init.d/mysql start && cat sample-data.sql | mysql -u root druid && /etc/init.d/mysql stop
+
+# Setup supervisord
+ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# Clean up
+RUN apt-get clean && rm -rf /tmp/* /var/tmp/*
+
+# Expose ports:
+# - 8081: HTTP (coordinator)
+# - 8082: HTTP (broker)
+# - 8083: HTTP (historical)
+# - 3306: MySQL
+# - 2181 2888 3888: ZooKeeper
+# - 8100 8101 8102 8103 8104 : peon ports
+EXPOSE 8081
+EXPOSE 8082
+EXPOSE 8083
+EXPOSE 3306
+EXPOSE 2181 2888 3888
+EXPOSE 8100 8101 8102 8103 8104
+WORKDIR /var/lib/druid
+ENTRYPOINT export HOST_IP="$(resolveip -s $HOSTNAME)" && exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
--- a/integration-tests/docker/broker.conf
+++ b/integration-tests/docker/broker.conf
@ -0,0 +1,29 @@
+[program:druid-broker]
+command=java
+  -server
+  -Xmx1g
+  -Xms1g
+  -XX:NewSize=500m
+  -XX:MaxNewSize=500m
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.processing.buffer.sizeBytes=75000000
+  -Ddruid.server.http.numThreads=100
+  -Ddruid.processing.numThreads=1
+  -Ddruid.broker.http.numConnections=30
+  -Ddruid.broker.http.readTimeout=PT5M
+  -Ddruid.broker.cache.useCache=true
+  -Ddruid.broker.cache.populateCache=true
+  -Ddruid.cache.type=local
+  -Ddruid.cache.sizeInBytes=40000000
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server broker
+redirect_stderr=true
+autorestart=false
+priority=100
+stdout_logfile=/shared/logs/broker.log
--- a/integration-tests/docker/coordinator.conf
+++ b/integration-tests/docker/coordinator.conf
@ -0,0 +1,23 @@
+[program:druid-coordinator]
+command=java
+  -server
+  -Xmx128m
+  -Xms128m
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.metadata.storage.type=mysql
+  -Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://druid-metadata-storage/druid
+  -Ddruid.metadata.storage.connector.user=druid
+  -Ddruid.metadata.storage.connector.password=diurd
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.coordinator.startDelay=PT5S
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server coordinator
+redirect_stderr=true
+priority=100
+autorestart=false
+stdout_logfile=/shared/logs/coordinator.log
--- a/integration-tests/docker/historical.conf
+++ b/integration-tests/docker/historical.conf
@ -0,0 +1,27 @@
+[program:druid-historical]
+command=java
+  -server
+  -Xmx1500m
+  -Xms1500m
+  -XX:NewSize=750m
+  -XX:MaxNewSize=750m
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ
+  -Ddruid.s3.secretKey=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
+  -Ddruid.processing.buffer.sizeBytes=75000000
+  -Ddruid.processing.numThreads=3
+  -Ddruid.server.http.numThreads=100
+  -Ddruid.segmentCache.locations="[{\"path\":\"/shared/druid/indexCache\",\"maxSize\":5000000000}]"
+  -Ddruid.server.maxSize=5000000000
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server historical
+redirect_stderr=true
+priority=100
+autorestart=false
+stdout_logfile=/shared/logs/historical.log
--- a/integration-tests/docker/metadata-storage.conf
+++ b/integration-tests/docker/metadata-storage.conf
@ -0,0 +1,6 @@
+[program:mysql]
+command=/usr/bin/pidproxy /var/run/mysqld/mysqld.pid /usr/bin/mysqld_safe
+         --bind-address=0.0.0.0
+user=mysql
+priority=0
+stdout_logfile=/shared/logs/mysql.log
--- a/integration-tests/docker/middlemanager.conf
+++ b/integration-tests/docker/middlemanager.conf
@ -0,0 +1,29 @@
+[program:druid-middlemanager]
+command=java
+  -server
+  -Xmx64m
+  -Xms64m
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.indexer.logs.directory=/shared/tasklogs
+  -Ddruid.storage.storageDirectory=/shared/storage
+  -Ddruid.indexer.runner.javaOpts=-server -Xmx256m -Xms256m -XX:NewSize=128m -XX:MaxNewSize=128m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
+  -Ddruid.indexer.fork.property.druid.processing.buffer.sizeBytes=75000000
+  -Ddruid.indexer.fork.property.druid.processing.numThreads=1
+  -Ddruid.indexer.fork.server.http.numThreads=100
+  -Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ
+  -Ddruid.s3.secretKey=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
+  -Ddruid.worker.ip=%(ENV_HOST_IP)s
+  -Ddruid.selectors.indexing.serviceName=druid:overlord
+  -Ddruid.indexer.task.chathandler.type=announce
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server middleManager
+redirect_stderr=true
+priority=100
+autorestart=false
+stdout_logfile=/shared/logs/middlemanager.log
--- a/integration-tests/docker/overlord.conf
+++ b/integration-tests/docker/overlord.conf
@ -0,0 +1,25 @@
+[program:druid-overlord]
+command=java
+  -server
+  -Xmx128m
+  -Xms128m
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.metadata.storage.type=mysql
+  -Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://druid-metadata-storage/druid
+  -Ddruid.metadata.storage.connector.user=druid
+  -Ddruid.metadata.storage.connector.password=diurd
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.indexer.storage.type=metadata
+  -Ddruid.indexer.logs.directory=/shared/tasklogs
+  -Ddruid.indexer.runner.type=remote
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server overlord
+redirect_stderr=true
+priority=100
+autorestart=false
+stdout_logfile=/shared/logs/overlord.log
--- a/integration-tests/docker/router.conf
+++ b/integration-tests/docker/router.conf
@ -0,0 +1,20 @@
+[program:druid-router]
+command=java
+  -server
+  -Xmx1g
+  -XX:+UseConcMarkSweepGC
+  -XX:+PrintGCDetails
+  -XX:+PrintGCTimeStamps
+  -Duser.timezone=UTC
+  -Dfile.encoding=UTF-8
+  -Ddruid.host=%(ENV_HOST_IP)s
+  -Ddruid.zk.service.host=druid-zookeeper
+  -Ddruid.computation.buffer.size=75000000
+  -Ddruid.server.http.numThreads=100
+  -Ddruid.processing.numThreads=1
+  -cp /usr/local/druid/lib/*
+  io.druid.cli.Main server router
+redirect_stderr=true
+priority=100
+autorestart=false
+stdout_logfile=/shared/logs/router.log
--- a/integration-tests/docker/sample-data.sql
+++ b/integration-tests/docker/sample-data.sql
@ -0,0 +1,5 @@
+INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9','twitterstream','2013-05-13T01:08:18.192Z','2013-01-01T00:00:00.000Z','2013-01-02T00:00:00.000Z',0,'2013-01-02T04:13:41.980Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z\",\"version\":\"2013-01-02T04:13:41.980Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/2013-01-02T04:13:41.980Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":445235220,\"identifier\":\"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9\"}');
+INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9','twitterstream','2013-05-13T00:03:28.640Z','2013-01-02T00:00:00.000Z','2013-01-03T00:00:00.000Z',0,'2013-01-03T03:44:58.791Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z\",\"version\":\"2013-01-03T03:44:58.791Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z/2013-01-03T03:44:58.791Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":435325540,\"identifier\":\"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9\"}');
+INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}');
+INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}');
+INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
--- a/integration-tests/docker/supervisord.conf
+++ b/integration-tests/docker/supervisord.conf
@ -0,0 +1,6 @@
+[supervisord]
+nodaemon=true
+
+[include]
+files = /usr/lib/druid/conf/*.conf
+
--- a/integration-tests/docker/zookeeper.conf
+++ b/integration-tests/docker/zookeeper.conf
@ -0,0 +1,5 @@
+[program:zookeeper]
+command=/usr/local/zookeeper/bin/zkServer.sh start-foreground
+user=daemon
+priority=0
+stdout_logfile=/shared/logs/zookeeper.log
--- a/integration-tests/pom.xml
+++ b/integration-tests/pom.xml
@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Druid - a distributed column store.
+~ Copyright (C) 2012, 2013  Metamarkets Group Inc.
+~
+~ This program is free software; you can redistribute it and/or
+~ modify it under the terms of the GNU General Public License
+~ as published by the Free Software Foundation; either version 2
+~ of the License, or (at your option) any later version.
+~
+~ This program is distributed in the hope that it will be useful,
+~ but WITHOUT ANY WARRANTY; without even the implied warranty of
+~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+~ GNU General Public License for more details.
+~
+~ You should have received a copy of the GNU General Public License
+~ along with this program; if not, write to the Free Software
+~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>io.druid</groupId>
+  <artifactId>druid-integration-tests</artifactId>
+  <name>druid-integration-tests</name>
+  <description>druid-integration-tests</description>
+  <parent>
+    <groupId>io.druid</groupId>
+    <artifactId>druid</artifactId>
+    <version>0.7.0-SNAPSHOT</version>
+  </parent>
+
+  <dependencies>
+    <dependency>
+      <groupId>io.druid</groupId>
+      <artifactId>druid-common</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.druid.extensions</groupId>
+      <artifactId>druid-s3-extensions</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.druid.extensions</groupId>
+      <artifactId>druid-histogram</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.druid.extensions</groupId>
+      <artifactId>mysql-metadata-storage</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.druid</groupId>
+      <artifactId>druid-services</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.druid</groupId>
+      <artifactId>druid-server</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+
+    <!-- Tests -->
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymock</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>**/IT*.java</exclude>
+          </excludes>
+          <systemPropertyVariables>
+            <user.timezone>UTC</user.timezone>
+          </systemPropertyVariables>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <profiles>
+    <profile>
+      <id>integration-tests</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <version>1.2.1</version>
+            <executions>
+              <execution>
+                <id>build-and-start-druid-cluster</id>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <phase>pre-integration-test</phase>
+                <configuration>
+                  <executable>${project.basedir}/run_cluster.sh</executable>
+                </configuration>
+              </execution>
+              <execution>
+                <id>stop-druid-cluster</id>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <phase>post-integration-test</phase>
+                <configuration>
+                  <executable>${project.basedir}/stop_cluster.sh</executable>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <artifactId>maven-failsafe-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>integration-tests</id>
+                <phase>integration-test</phase>
+                <goals>
+                  <goal>integration-test</goal>
+                  <goal>verify</goal>
+                </goals>
+              </execution>
+            </executions>
+            <configuration>
+              <argLine>-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dtestrunfactory=org.testng.DruidTestRunnerFactory
+                -Ddruid.test.config.dockerIp=${env.DOCKER_IP} -Ddruid.zk.service.host=${env.DOCKER_IP}
+              </argLine>
+              <suiteXmlFiles>
+                <suiteXmlFile>src/test/resources/testng.xml</suiteXmlFile>
+              </suiteXmlFiles>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
+
--- a/integration-tests/run_cluster.sh
+++ b/integration-tests/run_cluster.sh
@ -0,0 +1,49 @@
+# cleanup 
+for node in druid-historical druid-coordinator druid-overlord druid-router druid-broker druid-middlemanager druid-zookeeper druid-metadata-storage;
+do
+docker stop $node
+docker rm $node
+done
+
+# environment variables
+DIR=$(cd $(dirname $0) && pwd)
+DOCKERDIR=$DIR/docker
+SHARED_DIR=${HOME}/shared
+SUPERVISORDIR=/usr/lib/druid/conf
+RESOURCEDIR=$DIR/src/test/resources
+
+# Make directories if they dont exist
+mkdir -p $SHARED_DIR/logs
+mkdir -p $SHARED_DIR/tasklogs
+
+# install druid jars 
+rm -rf $SHARED_DIR/docker
+cp -R docker $SHARED_DIR/docker
+mvn dependency:copy-dependencies -DoutputDirectory=$SHARED_DIR/docker/lib
+
+# Build Druid Cluster Image
+docker build -t druid/cluster $SHARED_DIR/docker
+
+# Start zookeeper
+docker run -d --name druid-zookeeper -p 2181:2181 -v $SHARED_DIR:/shared -v $DOCKERDIR/zookeeper.conf:$SUPERVISORDIR/zookeeper.conf druid/cluster
+
+# Start MYSQL 
+docker run -d --name druid-metadata-storage -v $SHARED_DIR:/shared -v $DOCKERDIR/metadata-storage.conf:$SUPERVISORDIR/metadata-storage.conf druid/cluster
+
+# Start Overlord
+docker run -d --name druid-overlord -p 8090:8090 -v $SHARED_DIR:/shared -v $DOCKERDIR/overlord.conf:$SUPERVISORDIR/overlord.conf --link druid-metadata-storage:druid-metadata-storage --link druid-zookeeper:druid-zookeeper druid/cluster
+
+# Start Coordinator
+docker run -d --name druid-coordinator -p 8081:8081 -v $SHARED_DIR:/shared -v $DOCKERDIR/coordinator.conf:$SUPERVISORDIR/coordinator.conf --link druid-overlord:druid-overlord --link druid-metadata-storage:druid-metadata-storage --link druid-zookeeper:druid-zookeeper druid/cluster
+
+# Start Historical 
+docker run -d --name druid-historical -v $SHARED_DIR:/shared -v $DOCKERDIR/historical.conf:$SUPERVISORDIR/historical.conf --link druid-zookeeper:druid-zookeeper druid/cluster
+
+# Start Middlemanger
+docker run -d --name druid-middlemanager -p 8100:8100 -p 8101:8101 -p 8102:8102 -p 8103:8103 -p 8104:8104 -p 8105:8105 -v $RESOURCEDIR:/resources -v $SHARED_DIR:/shared -v $DOCKERDIR/middlemanager.conf:$SUPERVISORDIR/middlemanager.conf --link druid-zookeeper:druid-zookeeper --link druid-overlord:druid-overlord druid/cluster
+
+# Start Broker 
+docker run -d --name druid-broker -v $SHARED_DIR:/shared -v $DOCKERDIR/broker.conf:$SUPERVISORDIR/broker.conf --link druid-zookeeper:druid-zookeeper --link druid-middlemanager:druid-middlemanager --link druid-historical:druid-historical druid/cluster
+
+# Start Router 
+docker run -d --name druid-router -p 8888:8888 -v $SHARED_DIR:/shared -v $DOCKERDIR/router.conf:$SUPERVISORDIR/router.conf --link druid-zookeeper:druid-zookeeper --link druid-coordinator:druid-coordinator --link druid-broker:druid-broker druid/cluster
--- a/integration-tests/src/main/java/io/druid/testing/DockerConfigProvider.java
+++ b/integration-tests/src/main/java/io/druid/testing/DockerConfigProvider.java
@ -0,0 +1,67 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing;
+
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.api.client.repackaged.com.google.common.base.Throwables;
+import org.apache.commons.io.IOUtils;
+
+import javax.validation.constraints.NotNull;
+import java.util.List;
+
+public class DockerConfigProvider  implements IntegrationTestingConfigProvider
+{
+
+  @JsonProperty
+  @NotNull
+  private String dockerIp;
+
+  @Override
+  public IntegrationTestingConfig get()
+  {
+    return new IntegrationTestingConfig()
+    {
+      @Override
+      public String getCoordinatorHost()
+      {
+        return dockerIp+":8081";
+      }
+
+      @Override
+      public String getIndexerHost()
+      {
+        return dockerIp+":8090";
+      }
+
+      @Override
+      public String getRouterHost()
+      {
+        return dockerIp+ ":8888";
+      }
+
+      @Override
+      public String getMiddleManagerHost()
+      {
+        return dockerIp;
+      }
+    };
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/IntegrationTestingConfig.java
+++ b/integration-tests/src/main/java/io/druid/testing/IntegrationTestingConfig.java
@ -0,0 +1,33 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing;
+
+/**
+ */
+public interface IntegrationTestingConfig
+{
+  public String getCoordinatorHost();
+
+  public String getIndexerHost();
+
+  public String getRouterHost();
+
+  public String getMiddleManagerHost();
+}
--- a/integration-tests/src/main/java/io/druid/testing/IntegrationTestingConfigProvider.java
+++ b/integration-tests/src/main/java/io/druid/testing/IntegrationTestingConfigProvider.java
@ -0,0 +1,32 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import com.google.inject.Provider;
+
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = DockerConfigProvider.class)
+@JsonSubTypes(value = {
+    @JsonSubTypes.Type(name = "docker", value = DockerConfigProvider.class)
+})
+public interface IntegrationTestingConfigProvider extends Provider<IntegrationTestingConfig>
+{
+}
--- a/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java
+++ b/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java
@ -0,0 +1,146 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.clients;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Charsets;
+import com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.ISE;
+import com.metamx.common.logger.Logger;
+import com.metamx.http.client.HttpClient;
+import com.metamx.http.client.RequestBuilder;
+import com.metamx.http.client.response.StatusResponseHandler;
+import com.metamx.http.client.response.StatusResponseHolder;
+import io.druid.guice.annotations.Global;
+import io.druid.testing.IntegrationTestingConfig;
+import org.jboss.netty.handler.codec.http.HttpMethod;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+import org.joda.time.Interval;
+
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Map;
+
+public class CoordinatorResourceTestClient
+{
+  private final static Logger LOG = new Logger(CoordinatorResourceTestClient.class);
+  private final ObjectMapper jsonMapper;
+  private final HttpClient httpClient;
+  private final String coordinator;
+  private final StatusResponseHandler responseHandler;
+
+  @Inject
+  CoordinatorResourceTestClient(
+      ObjectMapper jsonMapper,
+      @Global HttpClient httpClient, IntegrationTestingConfig config
+  )
+  {
+    this.jsonMapper = jsonMapper;
+    this.httpClient = httpClient;
+    this.coordinator = config.getCoordinatorHost();
+    this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
+  }
+
+  private String getCoordinatorURL()
+  {
+    return String.format(
+        "http://%s/druid/coordinator/v1/",
+        coordinator
+    );
+  }
+
+  private Map<String, Integer> getLoadStatus()
+  {
+    Map<String, Integer> status = null;
+    try {
+      StatusResponseHolder response = makeRequest(HttpMethod.GET, getCoordinatorURL() + "loadstatus?simple");
+
+      status = jsonMapper.readValue(
+          response.getContent(), new TypeReference<Map<String, Integer>>()
+          {
+          }
+      );
+    }
+    catch (Exception e) {
+      Throwables.propagate(e);
+    }
+    return status;
+  }
+
+  public boolean areSegmentsLoaded(String dataSource)
+  {
+    final Map<String, Integer> status = getLoadStatus();
+    return (status.containsKey(dataSource) && status.get(dataSource) == 0);
+  }
+
+  public void unloadSegmentsForDataSource(String dataSource, Interval interval)
+  {
+    killDataSource(dataSource, false, interval);
+  }
+
+  public void deleteSegmentsDataSource(String dataSource, Interval interval)
+  {
+    killDataSource(dataSource, true, interval);
+  }
+
+  private void killDataSource(String dataSource, boolean kill, Interval interval)
+  {
+    try {
+      makeRequest(
+          HttpMethod.DELETE,
+          String.format(
+              "%sdatasources/%s?kill=%s&interval=%s",
+              getCoordinatorURL(),
+              dataSource, kill, URLEncoder.encode(interval.toString(), "UTF-8")
+          )
+      );
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  private StatusResponseHolder makeRequest(HttpMethod method, String url)
+  {
+    try {
+      StatusResponseHolder response = new RequestBuilder(
+          this.httpClient,
+          method, new URL(url)
+      )
+          .go(responseHandler)
+          .get();
+      if (!response.getStatus().equals(HttpResponseStatus.OK)) {
+        throw new ISE(
+            "Error while making request to url[%s] status[%s] content[%s]",
+            url,
+            response.getStatus(),
+            response.getContent()
+        );
+      }
+      return response;
+    }
+    catch (Exception e) {
+      LOG.error(e, "Exception while sending request");
+      throw Throwables.propagate(e);
+    }
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java
+++ b/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java
@ -0,0 +1,135 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.clients;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.api.client.util.Charsets;
+import com.google.common.base.Throwables;
+import com.metamx.common.ISE;
+import com.metamx.http.client.HttpClient;
+import com.metamx.http.client.response.StatusResponseHandler;
+import com.metamx.http.client.response.StatusResponseHolder;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+
+import javax.ws.rs.core.MediaType;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Map;
+
+public class EventReceiverFirehoseTestClient
+{
+  private final String host;
+  private final StatusResponseHandler responseHandler;
+  private final ObjectMapper jsonMapper;
+  private final HttpClient httpClient;
+  private final String chatID;
+
+  public EventReceiverFirehoseTestClient(String host, String chatID, ObjectMapper jsonMapper, HttpClient httpClient)
+  {
+    this.host = host;
+    this.jsonMapper = jsonMapper;
+    this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
+    this.httpClient = httpClient;
+    this.chatID = chatID;
+  }
+
+  private String getURL()
+  {
+    return String.format(
+        "http://%s/druid/worker/v1/chat/%s/push-events/",
+        host,
+        chatID
+    );
+  }
+
+  /**
+   * post events from the collection and return the count of events accepted
+   *
+   * @param events Collection of events to be posted
+   *
+   * @return
+   */
+  public int postEvents(Collection<Map<String, Object>> events)
+  {
+    try {
+      StatusResponseHolder response = httpClient.post(new URL(getURL()))
+                                                .setContent(
+                                                    MediaType.APPLICATION_JSON,
+                                                    this.jsonMapper.writeValueAsBytes(events)
+                                                )
+                                                .go(responseHandler)
+                                                .get();
+      if (!response.getStatus().equals(HttpResponseStatus.OK)) {
+        throw new ISE(
+            "Error while posting events to url[%s] status[%s] content[%s]",
+            getURL(),
+            response.getStatus(),
+            response.getContent()
+        );
+      }
+      Map<String, Integer> responseData = jsonMapper.readValue(
+          response.getContent(), new TypeReference<Map<String, Integer>>()
+          {
+          }
+      );
+      return responseData.get("eventCount");
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public int postEventsFromFile(String file)
+  {
+    try {
+      BufferedReader reader = new BufferedReader(
+          new InputStreamReader(
+              EventReceiverFirehoseTestClient.class.getResourceAsStream(
+                  file
+              )
+          )
+      );
+      String s;
+      Collection<Map<String, Object>> events = new ArrayList<Map<String, Object>>();
+      while ((s = reader.readLine()) != null) {
+        events.add(
+            (Map<String, Object>) this.jsonMapper.readValue(
+                s, new TypeReference<Map<String, Object>>()
+                {
+                }
+            )
+        );
+      }
+      int eventsPosted = postEvents(events);
+      if (eventsPosted != events.size()) {
+        throw new ISE("All events not posted, expected : %d actual : %d", events.size(), eventsPosted);
+      }
+      return eventsPosted;
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java
+++ b/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java
@ -0,0 +1,213 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.clients;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Charsets;
+import com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.ISE;
+import com.metamx.common.logger.Logger;
+import com.metamx.http.client.HttpClient;
+import com.metamx.http.client.response.StatusResponseHandler;
+import com.metamx.http.client.response.StatusResponseHolder;
+import io.druid.guice.annotations.Global;
+import io.druid.indexing.common.TaskStatus;
+import io.druid.indexing.common.task.Task;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.utils.RetryUtil;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+
+public class OverlordResourceTestClient
+{
+  private final static Logger LOG = new Logger(OverlordResourceTestClient.class);
+  private final ObjectMapper jsonMapper;
+  private final HttpClient httpClient;
+  private final String indexer;
+  private final StatusResponseHandler responseHandler;
+
+  @Inject
+  OverlordResourceTestClient(
+      ObjectMapper jsonMapper,
+      @Global HttpClient httpClient, IntegrationTestingConfig config
+  )
+  {
+    this.jsonMapper = jsonMapper;
+    this.httpClient = httpClient;
+    this.indexer = config.getIndexerHost();
+    this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
+  }
+
+  private String getIndexerURL()
+  {
+    return String.format(
+        "http://%s/druid/indexer/v1/",
+        indexer
+    );
+  }
+
+  public String submitTask(Task task)
+  {
+    try {
+      return submitTask(this.jsonMapper.writeValueAsString(task));
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public String submitTask(String task)
+  {
+    try {
+      StatusResponseHolder response = httpClient.post(new URL(getIndexerURL() + "task"))
+                                                .setContent(
+                                                    "application/json",
+                                                    task.getBytes()
+                                                )
+                                                .go(responseHandler)
+                                                .get();
+      if (!response.getStatus().equals(HttpResponseStatus.OK)) {
+        throw new ISE(
+            "Error while submitting task to indexer response [%s %s]",
+            response.getStatus(),
+            response.getContent()
+        );
+      }
+      Map<String, String> responseData = jsonMapper.readValue(
+          response.getContent(), new TypeReference<Map<String, String>>()
+          {
+          }
+      );
+      String taskID = responseData.get("task");
+      LOG.info("Submitted task with TaskID[%s]", taskID);
+      return taskID;
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public TaskStatus.Status getTaskStatus(String taskID)
+  {
+    try {
+      StatusResponseHolder response = makeRequest(
+          String.format(
+              "%stask/%s/status",
+              getIndexerURL(),
+              URLEncoder.encode(taskID, "UTF-8")
+          )
+      );
+
+      LOG.info("Index status response" + response.getContent());
+      Map<String, Object> responseData = jsonMapper.readValue(
+          response.getContent(), new TypeReference<Map<String, Object>>()
+          {
+          }
+      );
+      //TODO: figure out a better way to parse the response...
+      String status = (String) ((Map) responseData.get("status")).get("status");
+      return TaskStatus.Status.valueOf(status);
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public List<TaskResponseObject> getRunningTasks()
+  {
+    return getTasks("runningTasks");
+  }
+
+  public List<TaskResponseObject> getWaitingTasks()
+  {
+    return getTasks("waitingTasks");
+  }
+
+  public List<TaskResponseObject> getPendingTasks()
+  {
+    return getTasks("pendingTasks");
+  }
+
+  private List<TaskResponseObject> getTasks(String identifier)
+  {
+    try {
+      StatusResponseHolder response = makeRequest(
+          String.format("%s%s", getIndexerURL(), identifier)
+      );
+      LOG.info("Tasks %s response %s", identifier, response.getContent());
+      return jsonMapper.readValue(
+          response.getContent(), new TypeReference<List<TaskResponseObject>>()
+          {
+          }
+      );
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public void waitUntilTaskCompletes(final String taskID)
+  {
+    RetryUtil.retryUntil(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            TaskStatus.Status status = getTaskStatus(taskID);
+            if (status == TaskStatus.Status.FAILED) {
+              throw new ISE("Indexer task FAILED");
+            }
+            return status == TaskStatus.Status.SUCCESS;
+          }
+        },
+        true,
+        60000,
+        10,
+        "Index Task to complete"
+    );
+  }
+
+  private StatusResponseHolder makeRequest(String url)
+  {
+    try {
+      StatusResponseHolder response = this.httpClient
+          .get(new URL(url))
+          .go(responseHandler)
+          .get();
+      if (!response.getStatus().equals(HttpResponseStatus.OK)) {
+        throw new ISE("Error while making request to indexer [%s %s]", response.getStatus(), response.getContent());
+      }
+      return response;
+    }
+    catch (Exception e) {
+      LOG.error(e, "Exception while sending request");
+      throw Throwables.propagate(e);
+    }
+  }
+
+}
--- a/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java
+++ b/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java
@ -0,0 +1,98 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.clients;
+
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Charsets;
+import com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.ISE;
+import com.metamx.http.client.HttpClient;
+import com.metamx.http.client.response.StatusResponseHandler;
+import com.metamx.http.client.response.StatusResponseHolder;
+import io.druid.guice.annotations.Global;
+import io.druid.query.Query;
+import io.druid.testing.IntegrationTestingConfig;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+
+import java.net.URL;
+import java.util.List;
+import java.util.Map;
+
+public class QueryResourceTestClient
+{
+  private final ObjectMapper jsonMapper;
+  private final HttpClient httpClient;
+  private final String router;
+  private final StatusResponseHandler responseHandler;
+
+  @Inject
+  QueryResourceTestClient(
+      ObjectMapper jsonMapper,
+      @Global HttpClient httpClient,
+      IntegrationTestingConfig config
+  )
+  {
+    this.jsonMapper = jsonMapper;
+    this.httpClient = httpClient;
+    this.router = config.getRouterHost();
+    this.responseHandler = new StatusResponseHandler(Charsets.UTF_8);
+  }
+
+  private String getBrokerURL()
+  {
+    return String.format(
+        "http://%s/druid/v2/",
+        router
+    );
+  }
+
+  public List<Map<String, Object>> query(Query query)
+  {
+    try {
+      StatusResponseHolder response = httpClient.post(new URL(getBrokerURL()))
+                                                .setContent(
+                                                    "application/json",
+                                                    jsonMapper.writeValueAsBytes(query)
+                                                )
+                                                .go(responseHandler)
+                                                .get();
+      if (!response.getStatus().equals(HttpResponseStatus.OK)) {
+        throw new ISE(
+            "Error while querying[%s] status[%s] content[%s]",
+            getBrokerURL(),
+            response.getStatus(),
+            response.getContent()
+        );
+      }
+
+      return jsonMapper.readValue(
+          response.getContent(), new TypeReference<List<Map<String, Object>>>()
+          {
+          }
+      );
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/clients/TaskResponseObject.java
+++ b/integration-tests/src/main/java/io/druid/testing/clients/TaskResponseObject.java
@ -0,0 +1,68 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.clients;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.druid.indexing.common.TaskStatus;
+import org.joda.time.DateTime;
+
+public class TaskResponseObject
+{
+
+  private final String id;
+  private final DateTime createdTime;
+  private final DateTime queueInsertionTime;
+  private final TaskStatus status;
+
+  @JsonCreator
+  private TaskResponseObject(
+      @JsonProperty("id") String id,
+      @JsonProperty("createdTime") DateTime createdTime,
+      @JsonProperty("queueInsertionTime") DateTime queueInsertionTime,
+      @JsonProperty("status") TaskStatus status
+  )
+  {
+    this.id = id;
+    this.createdTime = createdTime;
+    this.queueInsertionTime = queueInsertionTime;
+    this.status = status;
+  }
+
+  public String getId()
+  {
+    return id;
+  }
+
+  public DateTime getCreatedTime()
+  {
+    return createdTime;
+  }
+
+  public DateTime getQueueInsertionTime()
+  {
+    return queueInsertionTime;
+  }
+
+  public TaskStatus getStatus()
+  {
+    return status;
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/guice/DruidTestModule.java
+++ b/integration-tests/src/main/java/io/druid/testing/guice/DruidTestModule.java
@ -0,0 +1,54 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.guice;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Supplier;
+import com.google.inject.Binder;
+import com.google.inject.Module;
+import com.google.inject.Provides;
+import com.metamx.emitter.core.LoggingEmitter;
+import com.metamx.emitter.core.LoggingEmitterConfig;
+import com.metamx.emitter.service.ServiceEmitter;
+import io.druid.guice.ConfigProvider;
+import io.druid.guice.JsonConfigProvider;
+import io.druid.guice.LazySingleton;
+import io.druid.guice.ManageLifecycle;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.IntegrationTestingConfigProvider;
+
+/**
+ */
+public class DruidTestModule implements Module
+{
+  @Override
+  public void configure(Binder binder)
+  {
+    binder.bind(IntegrationTestingConfig.class).toProvider(IntegrationTestingConfigProvider.class).in(ManageLifecycle.class);
+    JsonConfigProvider.bind(binder, "druid.test.config", IntegrationTestingConfigProvider.class);
+  }
+
+  @Provides
+  @LazySingleton
+  public ServiceEmitter getServiceEmitter(Supplier<LoggingEmitterConfig> config, ObjectMapper jsonMapper)
+  {
+    return new ServiceEmitter("", "", new LoggingEmitter(config.get(), jsonMapper));
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/guice/DruidTestModuleFactory.java
+++ b/integration-tests/src/main/java/io/druid/testing/guice/DruidTestModuleFactory.java
@ -0,0 +1,63 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.guice;
+
+import com.google.common.collect.ImmutableList;
+import com.google.inject.Injector;
+import com.google.inject.Module;
+import io.druid.guice.GuiceInjectors;
+import io.druid.guice.IndexingServiceFirehoseModule;
+import io.druid.initialization.Initialization;
+import org.testng.IModuleFactory;
+import org.testng.ITestContext;
+
+import java.util.Collections;
+import java.util.List;
+
+public class DruidTestModuleFactory implements IModuleFactory
+{
+  private static final Module module = new DruidTestModule();
+  private static final Injector injector = Initialization.makeInjectorWithModules(
+      GuiceInjectors.makeStartupInjector(),
+      getModules()
+  );
+
+  public static Injector getInjector()
+  {
+    return injector;
+  }
+
+  private static List<? extends Module> getModules()
+  {
+    return ImmutableList.of(
+        new DruidTestModule(),
+        new IndexingServiceFirehoseModule()
+    );
+  }
+
+  @Override
+  public Module createModule(ITestContext context, Class<?> testClass)
+  {
+    context.addGuiceModule(DruidTestModule.class, module);
+    context.addInjector(Collections.singletonList(module), injector);
+    return module;
+  }
+
+}
--- a/integration-tests/src/main/java/io/druid/testing/utils/FromFileTestQueryHelper.java
+++ b/integration-tests/src/main/java/io/druid/testing/utils/FromFileTestQueryHelper.java
@ -0,0 +1,79 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.utils;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.inject.Inject;
+import com.metamx.common.ISE;
+import com.metamx.common.logger.Logger;
+import io.druid.testing.clients.QueryResourceTestClient;
+
+import java.util.List;
+import java.util.Map;
+
+public class FromFileTestQueryHelper
+{
+  public static Logger LOG = new Logger(FromFileTestQueryHelper.class);
+  private final QueryResourceTestClient queryClient;
+  private final ObjectMapper jsonMapper;
+
+  @Inject
+  FromFileTestQueryHelper(ObjectMapper jsonMapper, QueryResourceTestClient queryClient)
+  {
+    this.jsonMapper = jsonMapper;
+    this.queryClient = queryClient;
+  }
+
+  public void testQueriesFromFile(String filePath, int timesToRun) throws Exception
+  {
+    LOG.info("Starting query tests for [%s]", filePath);
+    List<QueryWithResults> queries =
+        jsonMapper.readValue(
+            FromFileTestQueryHelper.class.getResourceAsStream(filePath),
+            new TypeReference<List<QueryWithResults>>()
+            {
+            }
+        );
+    for (int i = 0; i < timesToRun; i++) {
+      LOG.info("Starting Iteration " + i);
+
+      boolean failed = false;
+      for (QueryWithResults queryWithResult : queries) {
+        LOG.info("Running Query " + queryWithResult.getQuery().getType());
+        List<Map<String, Object>> result = queryClient.query(queryWithResult.getQuery());
+        if (!QueryResultVerifier.compareResults(result, queryWithResult.getExpectedResults())) {
+          LOG.error(
+              "Failed while executing %s actualResults : %s",
+              queryWithResult,
+              jsonMapper.writeValueAsString(result)
+          );
+          failed = true;
+        } else {
+          LOG.info("Results Verified for Query " + queryWithResult.getQuery().getType());
+        }
+      }
+
+      if (failed) {
+        throw new ISE("one or more twitter  queries failed");
+      }
+    }
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/utils/QueryResultVerifier.java
+++ b/integration-tests/src/main/java/io/druid/testing/utils/QueryResultVerifier.java
@ -0,0 +1,49 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.utils;
+
+import java.util.Iterator;
+import java.util.Map;
+
+public class QueryResultVerifier
+{
+  public static boolean compareResults(
+      Iterable<Map<String, Object>> actual,
+      Iterable<Map<String, Object>> expected
+  )
+  {
+    Iterator<Map<String, Object>> actualIter = actual.iterator();
+    Iterator<Map<String, Object>> expectedIter = expected.iterator();
+
+    while (actualIter.hasNext() && expectedIter.hasNext()) {
+      Map<String, Object> actualRes = actualIter.next();
+      Map<String, Object> expRes = expectedIter.next();
+
+      if (!actualRes.equals(expRes)) {
+        return false;
+      }
+    }
+
+    if (actualIter.hasNext() || expectedIter.hasNext()) {
+      return false;
+    }
+    return true;
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/utils/QueryWithResults.java
+++ b/integration-tests/src/main/java/io/druid/testing/utils/QueryWithResults.java
@ -0,0 +1,64 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.utils;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.druid.query.Query;
+
+import java.util.List;
+import java.util.Map;
+
+public class QueryWithResults
+{
+  private final Query query;
+  private final List<Map<String, Object>> expectedResults;
+
+  @JsonCreator
+  public QueryWithResults(
+      @JsonProperty("query") Query query,
+      @JsonProperty("expectedResults") List<Map<String, Object>> expectedResults
+  )
+  {
+    this.query = query;
+    this.expectedResults = expectedResults;
+  }
+
+  @JsonProperty
+  public Query getQuery()
+  {
+    return query;
+  }
+
+  @JsonProperty
+  public List<Map<String, Object>> getExpectedResults()
+  {
+    return expectedResults;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "QueryWithResults{" +
+           "query=" + query +
+           ", expectedResults=" + expectedResults +
+           '}';
+  }
+}
--- a/integration-tests/src/main/java/io/druid/testing/utils/RetryUtil.java
+++ b/integration-tests/src/main/java/io/druid/testing/utils/RetryUtil.java
@ -0,0 +1,76 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.utils;
+
+import com.google.common.base.Throwables;
+import com.metamx.common.ISE;
+import com.metamx.common.logger.Logger;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
+public class RetryUtil
+{
+
+  private static final Logger LOG = new Logger(RetryUtil.class);
+
+  public static int DEFAULT_RETRY_COUNT = 10;
+
+  public static long DEFAULT_RETRY_SLEEP = TimeUnit.SECONDS.toMillis(30);
+
+  public static void retryUntilTrue(Callable<Boolean> callable, String task)
+  {
+    retryUntil(callable, true, DEFAULT_RETRY_SLEEP, DEFAULT_RETRY_COUNT, task);
+  }
+
+  public static void retryUntilFalse(Callable<Boolean> callable, String task)
+  {
+    retryUntil(callable, false, DEFAULT_RETRY_SLEEP, DEFAULT_RETRY_COUNT, task);
+  }
+
+  public static void retryUntil(
+      Callable<Boolean> callable,
+      boolean expectedValue,
+      long delayInMillis,
+      int retryCount,
+      String taskMessage
+  )
+  {
+    try {
+      int currentTry = 0;
+      while (callable.call() != expectedValue) {
+        if (currentTry > retryCount) {
+          throw new ISE("Max number of retries[%d] exceeded for Task[%s]. Failing.", retryCount, taskMessage);
+        }
+        LOG.info(
+            "Attempt[%d]: Task %s still not complete. Next retry in %d ms",
+            currentTry, taskMessage, delayInMillis
+        );
+        Thread.sleep(delayInMillis);
+
+        currentTry++;
+      }
+    }
+    catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+}
--- a/integration-tests/src/main/java/io/druid/testing/utils/ServerDiscoveryUtil.java
+++ b/integration-tests/src/main/java/io/druid/testing/utils/ServerDiscoveryUtil.java
@ -0,0 +1,64 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.testing.utils;
+
+import com.metamx.common.logger.Logger;
+import io.druid.client.selector.Server;
+import io.druid.curator.discovery.ServerDiscoverySelector;
+
+import java.util.concurrent.Callable;
+
+public class ServerDiscoveryUtil
+{
+
+  private static final Logger LOG = new Logger(ServerDiscoveryUtil.class);
+
+  public static boolean isInstanceReady(ServerDiscoverySelector serviceProvider)
+  {
+    try {
+      Server instance = serviceProvider.pick();
+      if (instance == null) {
+        LOG.warn("Unable to find a host");
+        return false;
+      }
+    }
+    catch (Exception e) {
+      LOG.error(e, "Caught exception waiting for host");
+      return false;
+    }
+    return true;
+  }
+
+  public static void waitUntilInstanceReady(final ServerDiscoverySelector serviceProvider, String instanceType)
+  {
+    RetryUtil.retryUntilTrue(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return isInstanceReady(serviceProvider);
+          }
+        },
+        String.format("Instance %s to get ready", instanceType)
+    );
+  }
+
+}
--- a/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java
+++ b/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java
@ -0,0 +1,150 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package org.testng;
+
+import com.google.api.client.repackaged.com.google.common.base.Throwables;
+import com.google.api.client.util.Charsets;
+import com.google.inject.Injector;
+import com.google.inject.Key;
+import com.metamx.common.lifecycle.Lifecycle;
+import com.metamx.common.logger.Logger;
+import com.metamx.http.client.HttpClient;
+import com.metamx.http.client.response.StatusResponseHandler;
+import com.metamx.http.client.response.StatusResponseHolder;
+import io.druid.guice.annotations.Global;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.RetryUtil;
+import org.jboss.netty.handler.codec.http.HttpResponseStatus;
+import org.testng.internal.IConfiguration;
+import org.testng.internal.annotations.IAnnotationFinder;
+import org.testng.xml.XmlTest;
+
+import java.net.URL;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+public class DruidTestRunnerFactory implements ITestRunnerFactory
+{
+  private static final Logger LOG = new Logger(DruidTestRunnerFactory.class);
+
+  @Override
+  public TestRunner newTestRunner(
+      ISuite suite, XmlTest test, List<IInvokedMethodListener> listeners
+  )
+  {
+    IConfiguration configuration = TestNG.getDefault().getConfiguration();
+    String outputDirectory = suite.getOutputDirectory();
+    IAnnotationFinder annotationFinder = configuration.getAnnotationFinder();
+    Boolean skipFailedInvocationCounts = suite.getXmlSuite().skipFailedInvocationCounts();
+    return new DruidTestRunner(
+        configuration,
+        suite,
+        test,
+        outputDirectory,
+        annotationFinder,
+        skipFailedInvocationCounts,
+        listeners
+    );
+  }
+
+  private static class DruidTestRunner extends TestRunner
+  {
+
+    protected DruidTestRunner(
+        IConfiguration configuration,
+        ISuite suite,
+        XmlTest test,
+        String outputDirectory,
+        IAnnotationFinder finder,
+        boolean skipFailedInvocationCounts,
+        List<IInvokedMethodListener> invokedMethodListeners
+    )
+    {
+      super(configuration, suite, test, outputDirectory, finder, skipFailedInvocationCounts, invokedMethodListeners);
+    }
+
+    @Override
+    public void run()
+    {
+      Injector injector = DruidTestModuleFactory.getInjector();
+      IntegrationTestingConfig config = injector.getInstance(IntegrationTestingConfig.class);
+      HttpClient client = injector.getInstance(Key.get(HttpClient.class, Global.class));
+      ;
+      waitUntilInstanceReady(client, config.getCoordinatorHost());
+      waitUntilInstanceReady(client, config.getIndexerHost());
+      waitUntilInstanceReady(client, config.getRouterHost());
+      Lifecycle lifecycle = injector.getInstance(Lifecycle.class);
+      try {
+        lifecycle.start();
+        runTests();
+      }
+      catch (Exception e) {
+        e.printStackTrace();
+        throw Throwables.propagate(e);
+      }
+      finally {
+        lifecycle.stop();
+      }
+
+    }
+
+    private void runTests()
+    {
+      super.run();
+    }
+
+    public void waitUntilInstanceReady(final HttpClient client, final String host)
+    {
+      final StatusResponseHandler handler = new StatusResponseHandler(Charsets.UTF_8);
+      RetryUtil.retryUntilTrue(
+          new Callable<Boolean>()
+          {
+            @Override
+            public Boolean call() throws Exception
+            {
+              try {
+                StatusResponseHolder response = client.get(
+                    new URL(
+                        String.format(
+                            "http://%s/status",
+                            host
+                        )
+                    )
+                )
+                                                      .go(handler)
+                                                      .get();
+                System.out.println(response.getStatus() + response.getContent());
+                if (response.getStatus().equals(HttpResponseStatus.OK)) {
+                  return true;
+                } else {
+                  return false;
+                }
+              }
+              catch (Throwable e) {
+                e.printStackTrace();
+                return false;
+              }
+            }
+          }, "Waiting for instance to be ready: [" + host + "]"
+      );
+    }
+  }
+}
--- a/integration-tests/src/main/java/org/testng/TestNG.java
+++ b/integration-tests/src/main/java/org/testng/TestNG.java
--- a/integration-tests/src/main/java/org/testng/remote/RemoteTestNG.java
+++ b/integration-tests/src/main/java/org/testng/remote/RemoteTestNG.java
@ -0,0 +1,336 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package org.testng.remote;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.ParameterException;
+import org.testng.CommandLineArgs;
+import org.testng.IInvokedMethodListener;
+import org.testng.ISuite;
+import org.testng.ISuiteListener;
+import org.testng.ITestRunnerFactory;
+import org.testng.TestNG;
+import org.testng.TestNGException;
+import org.testng.TestRunner;
+import org.testng.collections.Lists;
+import org.testng.internal.ClassHelper;
+import org.testng.remote.strprotocol.GenericMessage;
+import org.testng.remote.strprotocol.IMessageSender;
+import org.testng.remote.strprotocol.MessageHelper;
+import org.testng.remote.strprotocol.MessageHub;
+import org.testng.remote.strprotocol.RemoteTestListener;
+import org.testng.remote.strprotocol.SerializedMessageSender;
+import org.testng.remote.strprotocol.StringMessageSender;
+import org.testng.remote.strprotocol.SuiteMessage;
+import org.testng.reporters.JUnitXMLReporter;
+import org.testng.reporters.TestHTMLReporter;
+import org.testng.xml.XmlSuite;
+import org.testng.xml.XmlTest;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.testng.internal.Utils.defaultIfStringEmpty;
+
+/**
+ * Class copied from TestNG library ver 6.8.7 to apply a workaround for http://jira.codehaus.org/browse/SUREFIRE-622
+ * To Locate the PATCHED AREA search for keyword "PATCH" in this class file
+ * <p/>
+ * Extension of TestNG registering a remote TestListener.
+ *
+ * @author Cedric Beust <cedric@beust.com>
+ */
+public class RemoteTestNG extends TestNG
+{
+  // The following constants are referenced by the Eclipse plug-in, make sure you
+  // modify the plug-in as well if you change any of them.
+  public static final String DEBUG_PORT = "12345";
+  public static final String DEBUG_SUITE_FILE = "testng-customsuite.xml";
+  public static final String DEBUG_SUITE_DIRECTORY = System.getProperty("java.io.tmpdir");
+  public static final String PROPERTY_DEBUG = "testng.eclipse.debug";
+  public static final String PROPERTY_VERBOSE = "testng.eclipse.verbose";
+  private static final String LOCALHOST = "localhost";
+  // End of Eclipse constants.
+  /**
+   * Port used for the serialized protocol
+   */
+  private static Integer m_serPort = null;
+  private static boolean m_debug;
+  private static boolean m_dontExit;
+  private static boolean m_ack;
+  private ITestRunnerFactory m_customTestRunnerFactory;
+  private String m_host;
+  /**
+   * Port used for the string protocol
+   */
+  private Integer m_port = null;
+
+  public static void main(String[] args) throws ParameterException
+  {
+    CommandLineArgs cla = new CommandLineArgs();
+    RemoteArgs ra = new RemoteArgs();
+    new JCommander(Arrays.asList(cla, ra), args);
+    m_dontExit = ra.dontExit;
+    if (cla.port != null && ra.serPort != null) {
+      throw new TestNGException(
+          "Can only specify one of " + CommandLineArgs.PORT
+          + " and " + RemoteArgs.PORT
+      );
+    }
+    m_debug = cla.debug;
+    m_ack = ra.ack;
+    if (m_debug) {
+//      while (true) {
+      initAndRun(args, cla, ra);
+//      }
+    } else {
+      initAndRun(args, cla, ra);
+    }
+  }
+
+  private static void initAndRun(String[] args, CommandLineArgs cla, RemoteArgs ra)
+  {
+    RemoteTestNG remoteTestNg = new RemoteTestNG();
+    if (m_debug) {
+      // In debug mode, override the port and the XML file to a fixed location
+      cla.port = Integer.parseInt(DEBUG_PORT);
+      ra.serPort = cla.port;
+      cla.suiteFiles = Arrays.asList(
+          new String[]{
+              DEBUG_SUITE_DIRECTORY + DEBUG_SUITE_FILE
+          }
+      );
+    }
+    remoteTestNg.configure(cla);
+    remoteTestNg.setHost(cla.host);
+    m_serPort = ra.serPort;
+    remoteTestNg.m_port = cla.port;
+    if (isVerbose()) {
+      StringBuilder sb = new StringBuilder("Invoked with ");
+      for (String s : args) {
+        sb.append(s).append(" ");
+      }
+      p(sb.toString());
+//      remoteTestNg.setVerbose(1);
+//    } else {
+//      remoteTestNg.setVerbose(0);
+    }
+    validateCommandLineParameters(cla);
+    remoteTestNg.run();
+//    if (m_debug) {
+//      // Run in a loop if in debug mode so it is possible to run several launches
+//      // without having to relauch RemoteTestNG.
+//      while (true) {
+//        remoteTestNg.run();
+//        remoteTestNg.configure(cla);
+//      }
+//    } else {
+//      remoteTestNg.run();
+//    }
+  }
+
+  private static void p(String s)
+  {
+    if (isVerbose()) {
+      System.out.println("[RemoteTestNG] " + s);
+    }
+  }
+
+  public static boolean isVerbose()
+  {
+    boolean result = System.getProperty(PROPERTY_VERBOSE) != null || isDebug();
+    return result;
+  }
+
+  public static boolean isDebug()
+  {
+    return m_debug || System.getProperty(PROPERTY_DEBUG) != null;
+  }
+
+  private void calculateAllSuites(List<XmlSuite> suites, List<XmlSuite> outSuites)
+  {
+    for (XmlSuite s : suites) {
+      outSuites.add(s);
+//      calculateAllSuites(s.getChildSuites(), outSuites);
+    }
+  }
+
+  @Override
+  public void run()
+  {
+    IMessageSender sender = m_serPort != null
+                            ? new SerializedMessageSender(m_host, m_serPort, m_ack)
+                            : new StringMessageSender(m_host, m_port);
+    final MessageHub msh = new MessageHub(sender);
+    msh.setDebug(isDebug());
+    try {
+      msh.connect();
+      // We couldn't do this until now in debug mode since the .xml file didn't exist yet.
+      // Now that we have connected with the Eclipse client, we know that it created the .xml
+      // file so we can proceed with the initialization
+      initializeSuitesAndJarFile();
+
+      List<XmlSuite> suites = Lists.newArrayList();
+      calculateAllSuites(m_suites, suites);
+//      System.out.println("Suites: " + m_suites.get(0).getChildSuites().size()
+//          + " and:" + suites.get(0).getChildSuites().size());
+      if (suites.size() > 0) {
+
+        int testCount = 0;
+
+        for (int i = 0; i < suites.size(); i++) {
+          testCount += (suites.get(i)).getTests().size();
+        }
+
+        GenericMessage gm = new GenericMessage(MessageHelper.GENERIC_SUITE_COUNT);
+        gm.setSuiteCount(suites.size());
+        gm.setTestCount(testCount);
+        msh.sendMessage(gm);
+
+        addListener(new RemoteSuiteListener(msh));
+        setTestRunnerFactory(new DelegatingTestRunnerFactory(buildTestRunnerFactory(), msh));
+
+//        System.out.println("RemoteTestNG starting");
+        super.run();
+      } else {
+        System.err.println("No test suite found. Nothing to run");
+      }
+    }
+    catch (Throwable cause) {
+      cause.printStackTrace(System.err);
+    }
+    finally {
+//      System.out.println("RemoteTestNG finishing: " + (getEnd() - getStart()) + " ms");
+      msh.shutDown();
+      if (!m_debug && !m_dontExit) {
+        System.exit(0);
+      }
+    }
+  }
+
+  /**
+   * Override by the plugin if you need to configure differently the <code>TestRunner</code>
+   * (usually this is needed if different listeners/reporters are needed).
+   * <b>Note</b>: you don't need to worry about the wiring listener, because it is added
+   * automatically.
+   */
+  protected ITestRunnerFactory buildTestRunnerFactory()
+  {
+    //################### PATCH STARTS
+    if (System.getProperty("testrunfactory") != null) {
+      m_customTestRunnerFactory = (ITestRunnerFactory) ClassHelper.newInstance(
+          ClassHelper.fileToClass(
+              System.getProperty(
+                  "testrunfactory"
+              )
+          )
+      );
+      //################## PATCH ENDS
+    } else if (null == m_customTestRunnerFactory) {
+      m_customTestRunnerFactory = new ITestRunnerFactory()
+      {
+        @Override
+        public TestRunner newTestRunner(
+            ISuite suite, XmlTest xmlTest,
+            List<IInvokedMethodListener> listeners
+        )
+        {
+          TestRunner runner =
+              new TestRunner(
+                  getConfiguration(), suite, xmlTest,
+                  false /*skipFailedInvocationCounts */,
+                  listeners
+              );
+          if (m_useDefaultListeners) {
+            runner.addListener(new TestHTMLReporter());
+            runner.addListener(new JUnitXMLReporter());
+          }
+
+          return runner;
+        }
+      };
+    }
+
+    return m_customTestRunnerFactory;
+  }
+
+  private String getHost()
+  {
+    return m_host;
+  }
+
+  public void setHost(String host)
+  {
+    m_host = defaultIfStringEmpty(host, LOCALHOST);
+  }
+
+  private int getPort()
+  {
+    return m_port;
+  }
+
+  /**
+   * A ISuiteListener wiring the results using the internal string-based protocol.
+   */
+  private static class RemoteSuiteListener implements ISuiteListener
+  {
+    private final MessageHub m_messageSender;
+
+    RemoteSuiteListener(MessageHub smsh)
+    {
+      m_messageSender = smsh;
+    }
+
+    @Override
+    public void onFinish(ISuite suite)
+    {
+      m_messageSender.sendMessage(new SuiteMessage(suite, false /*start*/));
+    }
+
+    @Override
+    public void onStart(ISuite suite)
+    {
+      m_messageSender.sendMessage(new SuiteMessage(suite, true /*start*/));
+    }
+  }
+
+  private static class DelegatingTestRunnerFactory implements ITestRunnerFactory
+  {
+    private final ITestRunnerFactory m_delegateFactory;
+    private final MessageHub m_messageSender;
+
+    DelegatingTestRunnerFactory(ITestRunnerFactory trf, MessageHub smsh)
+    {
+      m_delegateFactory = trf;
+      m_messageSender = smsh;
+    }
+
+    @Override
+    public TestRunner newTestRunner(
+        ISuite suite, XmlTest test,
+        List<IInvokedMethodListener> listeners
+    )
+    {
+      TestRunner tr = m_delegateFactory.newTestRunner(suite, test, listeners);
+      tr.addListener(new RemoteTestListener(suite, test, m_messageSender));
+      return tr;
+    }
+  }
+}
--- a/integration-tests/src/test/java/io/druid/tests/indexer/AbstractIndexerTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/indexer/AbstractIndexerTest.java
@ -0,0 +1,85 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.indexer;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.inject.Inject;
+import io.druid.testing.clients.CoordinatorResourceTestClient;
+import io.druid.testing.clients.OverlordResourceTestClient;
+import io.druid.testing.utils.FromFileTestQueryHelper;
+import io.druid.testing.utils.RetryUtil;
+import org.apache.commons.io.IOUtils;
+import org.joda.time.Interval;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.concurrent.Callable;
+
+public abstract class AbstractIndexerTest
+{
+
+  @Inject
+  protected CoordinatorResourceTestClient coordinator;
+  @Inject
+  protected OverlordResourceTestClient indexer;
+  @Inject
+  protected ObjectMapper jsonMapper;
+
+  @Inject
+  protected FromFileTestQueryHelper queryHelper;
+
+  protected void unloadAndKillData(final String dataSource) throws Exception
+  {
+    Interval interval = new Interval("2013-01-01T00:00:00.000Z/2013-12-01T00:00:00.000Z");
+    coordinator.unloadSegmentsForDataSource(dataSource, interval);
+    RetryUtil.retryUntilFalse(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return coordinator.areSegmentsLoaded(dataSource);
+          }
+        }, "Segment Unloading"
+    );
+    coordinator.deleteSegmentsDataSource(dataSource, interval);
+    RetryUtil.retryUntilTrue(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return (indexer.getPendingTasks().size() + indexer.getRunningTasks().size() + indexer.getWaitingTasks()
+                                                                                                 .size()) == 0;
+          }
+        }, "Waiting for Tasks Completion"
+    );
+  }
+
+  protected String getTaskAsString(String file) throws IOException
+  {
+    InputStream inputStream = ITRealtimeIndexTaskTest.class.getResourceAsStream(file);
+    StringWriter writer = new StringWriter();
+    IOUtils.copy(inputStream, writer, "UTF-8");
+    return writer.toString();
+  }
+
+}
--- a/integration-tests/src/test/java/io/druid/tests/indexer/ITIndexerTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/indexer/ITIndexerTest.java
@ -0,0 +1,81 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.indexer;
+
+import com.google.api.client.repackaged.com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.logger.Logger;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.RetryUtil;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.concurrent.Callable;
+
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITIndexerTest extends AbstractIndexerTest
+{
+  private static final Logger LOG = new Logger(ITIndexerTest.class);
+  private static String INDEX_TASK = "/indexer/wikipedia_index_task.json";
+  private static String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
+  private static String INDEX_DATASOURCE = "wikipedia_index_test";
+
+
+  @Inject
+  private IntegrationTestingConfig config;
+
+  @Test
+  public void testIndexData() throws Exception
+  {
+    loadData();
+    try {
+      queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      Throwables.propagate(e);
+    }
+    finally {
+      unloadAndKillData(INDEX_DATASOURCE);
+    }
+
+  }
+
+  private void loadData() throws Exception
+  {
+    final String taskID = indexer.submitTask(getTaskAsString(INDEX_TASK));
+    LOG.info("TaskID for loading index task %s", taskID);
+    indexer.waitUntilTaskCompletes(taskID);
+
+    RetryUtil.retryUntilTrue(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return coordinator.areSegmentsLoaded(INDEX_DATASOURCE);
+          }
+        }, "Segment Load"
+    );
+  }
+
+
+}
--- a/integration-tests/src/test/java/io/druid/tests/indexer/ITRealtimeIndexTaskTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/indexer/ITRealtimeIndexTaskTest.java
@ -0,0 +1,142 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.indexer;
+
+import com.google.api.client.repackaged.com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.logger.Logger;
+import com.metamx.http.client.HttpClient;
+import io.druid.curator.discovery.ServerDiscoveryFactory;
+import io.druid.curator.discovery.ServerDiscoverySelector;
+import io.druid.guice.annotations.Global;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.clients.EventReceiverFirehoseTestClient;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.RetryUtil;
+import io.druid.testing.utils.ServerDiscoveryUtil;
+import org.joda.time.DateTime;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Steps
+ * 1) Submit a RealtimeIndexTask
+ * 2) Load Data using EventReceiverFirehose
+ * 3) Runs queries and verifies that the ingested data is available for queries
+ * 4) Waits for handover of the segment to historical node
+ * 5) Queries data from historical node and verifies handover
+ * 6) Removes and Delete the created Data Segment
+ */
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITRealtimeIndexTaskTest extends AbstractIndexerTest
+{
+  private static final Logger LOG = new Logger(ITRealtimeIndexTaskTest.class);
+  private static final String REALTIME_TASK_RESOURCE = "/indexer/wikipedia_realtime_index_task.json";
+  private static final String EVENT_RECEIVER_SERVICE_NAME = "eventReceiverServiceName";
+  private static final String EVENT_DATA_FILE = "/indexer/wikipedia_index_data.json";
+  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
+  private static final String INDEX_DATASOURCE = "wikipedia_index_test";
+  @Inject
+  ServerDiscoveryFactory factory;
+  @Inject
+  @Global
+  HttpClient httpClient;
+
+  @Inject
+  IntegrationTestingConfig config;
+
+  @Test
+  public void testRealtimeIndexTask() throws Exception
+  {
+    try {
+      // the task will run for 3 minutes and then shutdown itself
+      String task = setShutOffTime(
+          getTaskAsString(REALTIME_TASK_RESOURCE),
+          new DateTime(System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(2))
+      );
+      String taskID = indexer.submitTask(task);
+      postEvents();
+
+      // sleep for a while to let the events ingested
+      TimeUnit.SECONDS.sleep(5);
+
+      // should hit the queries on realtime task
+      this.queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
+      // wait for the task to complete
+      indexer.waitUntilTaskCompletes(taskID);
+
+      // task should complete only after the segments are loaded by historical node
+      RetryUtil.retryUntil(
+          new Callable<Boolean>()
+          {
+            @Override
+            public Boolean call() throws Exception
+            {
+              return coordinator.areSegmentsLoaded(INDEX_DATASOURCE);
+            }
+          },
+          true,
+          60000,
+          10,
+          "Real-time generated segments loaded"
+      );
+
+      // run queries on historical nodes
+      this.queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      Throwables.propagate(e);
+    }
+    finally {
+      unloadAndKillData(INDEX_DATASOURCE);
+    }
+  }
+
+  private String setShutOffTime(String taskAsString, DateTime time)
+  {
+    return taskAsString.replace("#SHUTOFFTIME", time.toString());
+  }
+
+  public void postEvents() throws Exception
+  {
+    final ServerDiscoverySelector eventReceiverSelector = factory.createSelector(EVENT_RECEIVER_SERVICE_NAME);
+    eventReceiverSelector.start();
+    try {
+      ServerDiscoveryUtil.waitUntilInstanceReady(eventReceiverSelector, "Event Receiver");
+      // Access the docker VM mapped host and port instead of service announced in zookeeper
+      String host = config.getMiddleManagerHost() + ":" + eventReceiverSelector.pick().getPort();
+      LOG.info("Event Receiver Found at host %s", host);
+      EventReceiverFirehoseTestClient client = new EventReceiverFirehoseTestClient(
+          host,
+          EVENT_RECEIVER_SERVICE_NAME,
+          jsonMapper,
+          httpClient
+      );
+      client.postEventsFromFile(EVENT_DATA_FILE);
+    }
+    finally {
+      eventReceiverSelector.stop();
+    }
+  }
+}
--- a/integration-tests/src/test/java/io/druid/tests/indexer/ITUnionQueryTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/indexer/ITUnionQueryTest.java
@ -0,0 +1,172 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.indexer;
+
+import com.beust.jcommander.internal.Lists;
+import com.google.api.client.repackaged.com.google.common.base.Throwables;
+import com.google.inject.Inject;
+import com.metamx.common.logger.Logger;
+import com.metamx.http.client.HttpClient;
+import io.druid.curator.discovery.ServerDiscoveryFactory;
+import io.druid.curator.discovery.ServerDiscoverySelector;
+import io.druid.guice.annotations.Global;
+import io.druid.testing.IntegrationTestingConfig;
+import io.druid.testing.clients.EventReceiverFirehoseTestClient;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.RetryUtil;
+import io.druid.testing.utils.ServerDiscoveryUtil;
+import org.joda.time.DateTime;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITUnionQueryTest extends AbstractIndexerTest
+{
+  private static final Logger LOG = new Logger(ITUnionQueryTest.class);
+  private static final String REALTIME_TASK_RESOURCE = "/indexer/wikipedia_realtime_index_task.json";
+  private static final String EVENT_RECEIVER_SERVICE_PREFIX = "eventReceiverServiceName";
+  private static final String UNION_DATA_FILE = "/indexer/wikipedia_index_data.json";
+  private static final String UNION_QUERIES_RESOURCE = "/indexer/union_queries.json";
+  private static final String UNION_DATASOURCE = "wikipedia_index_test";
+
+  @Inject
+  ServerDiscoveryFactory factory;
+
+  @Inject
+  @Global
+  HttpClient httpClient;
+
+  @Inject
+  IntegrationTestingConfig config;
+
+  @Test
+  public void testRealtimeIndexTask() throws Exception
+  {
+    final int numTasks = 4;
+
+    try {
+      // Load 4 datasources with same dimensions
+      String task = setShutOffTime(
+          getTaskAsString(REALTIME_TASK_RESOURCE),
+          new DateTime(System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(3))
+      );
+      List<String> taskIDs = Lists.newArrayList();
+      for (int i = 0; i < numTasks; i++) {
+        taskIDs.add(
+            indexer.submitTask(
+                withServiceName(
+                    withDataSource(task, UNION_DATASOURCE + i),
+                    EVENT_RECEIVER_SERVICE_PREFIX + i
+                )
+            )
+        );
+      }
+      for (int i = 0; i < numTasks; i++) {
+        postEvents(i);
+      }
+
+      // sleep for a while to let the events ingested
+      TimeUnit.SECONDS.sleep(5);
+
+      // should hit the queries on realtime task
+      LOG.info("Running Union Queries..");
+      this.queryHelper.testQueriesFromFile(UNION_QUERIES_RESOURCE, 2);
+
+      // wait for the task to complete
+      for (int i = 0; i < numTasks; i++) {
+        indexer.waitUntilTaskCompletes(taskIDs.get(i));
+      }
+      // task should complete only after the segments are loaded by historical node
+      for (int i = 0; i < numTasks; i++) {
+        final int taskNum = i;
+        RetryUtil.retryUntil(
+            new Callable<Boolean>()
+            {
+              @Override
+              public Boolean call() throws Exception
+              {
+                return coordinator.areSegmentsLoaded(UNION_DATASOURCE + taskNum);
+              }
+            },
+            true,
+            60000,
+            10,
+            "Real-time generated segments loaded"
+        );
+      }
+      // run queries on historical nodes
+      this.queryHelper.testQueriesFromFile(UNION_QUERIES_RESOURCE, 2);
+
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      throw Throwables.propagate(e);
+    }
+    finally {
+      for (int i = 0; i < numTasks; i++) {
+        unloadAndKillData(UNION_DATASOURCE + i);
+      }
+    }
+
+  }
+
+  private String setShutOffTime(String taskAsString, DateTime time)
+  {
+    return taskAsString.replace("#SHUTOFFTIME", time.toString());
+  }
+
+  private String withDataSource(String taskAsString, String dataSource)
+  {
+    return taskAsString.replace(UNION_DATASOURCE, dataSource);
+  }
+
+  private String withServiceName(String taskAsString, String serviceName)
+  {
+    return taskAsString.replace(EVENT_RECEIVER_SERVICE_PREFIX, serviceName);
+  }
+
+  public void postEvents(int id) throws Exception
+  {
+    final ServerDiscoverySelector eventReceiverSelector = factory.createSelector(EVENT_RECEIVER_SERVICE_PREFIX + id);
+    eventReceiverSelector.start();
+    try {
+      ServerDiscoveryUtil.waitUntilInstanceReady(eventReceiverSelector, "Event Receiver");
+      // Access the docker VM mapped host and port instead of service announced in zookeeper
+      String host = config.getMiddleManagerHost() + ":" + eventReceiverSelector.pick().getPort();
+
+      LOG.info("Event Receiver Found at host [%s]", host);
+
+      EventReceiverFirehoseTestClient client = new EventReceiverFirehoseTestClient(
+          host,
+          EVENT_RECEIVER_SERVICE_PREFIX + id,
+          jsonMapper,
+          httpClient
+      );
+      client.postEventsFromFile(UNION_DATA_FILE);
+    }
+    finally {
+      eventReceiverSelector.stop();
+    }
+  }
+}
--- a/integration-tests/src/test/java/io/druid/tests/query/ITTwitterQueryTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/query/ITTwitterQueryTest.java
@ -0,0 +1,65 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.query;
+
+import com.google.inject.Inject;
+import io.druid.testing.clients.CoordinatorResourceTestClient;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.FromFileTestQueryHelper;
+import io.druid.testing.utils.RetryUtil;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.concurrent.Callable;
+
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITTwitterQueryTest
+{
+  private static final String TWITTER_DATA_SOURCE = "twitterstream";
+  private static final String TWITTER_QUERIES_RESOURCE = "/queries/twitterstream_queries.json";
+  @Inject
+  CoordinatorResourceTestClient coordinatorClient;
+  @Inject
+  private FromFileTestQueryHelper queryHelper;
+
+  @BeforeMethod
+  public void before()
+  {
+    // ensure that the segments twitter segments are loaded completely
+    RetryUtil.retryUntilTrue(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return coordinatorClient.areSegmentsLoaded(TWITTER_DATA_SOURCE);
+          }
+        }, "twitter segment load"
+    );
+  }
+
+  @Test
+  public void testQueriesFromFile() throws Exception
+  {
+    queryHelper.testQueriesFromFile(TWITTER_QUERIES_RESOURCE, 2);
+  }
+
+}
--- a/integration-tests/src/test/java/io/druid/tests/query/ITWikipediaQueryTest.java
+++ b/integration-tests/src/test/java/io/druid/tests/query/ITWikipediaQueryTest.java
@ -0,0 +1,65 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.tests.query;
+
+import com.google.inject.Inject;
+import io.druid.testing.clients.CoordinatorResourceTestClient;
+import io.druid.testing.guice.DruidTestModuleFactory;
+import io.druid.testing.utils.FromFileTestQueryHelper;
+import io.druid.testing.utils.RetryUtil;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.concurrent.Callable;
+
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITWikipediaQueryTest
+{
+  private static final String WIKIPEDIA_DATA_SOURCE = "wikipedia_editstream";
+  private static final String WIKIPEDIA_QUERIES_RESOURCE = "/queries/wikipedia_editstream_queries.json";
+  @Inject
+  private CoordinatorResourceTestClient coordinatorClient;
+  @Inject
+  private FromFileTestQueryHelper queryHelper;
+
+  @BeforeMethod
+  public void before()
+  {
+    // ensure that twitter segments are loaded completely
+    RetryUtil.retryUntilTrue(
+        new Callable<Boolean>()
+        {
+          @Override
+          public Boolean call() throws Exception
+          {
+            return coordinatorClient.areSegmentsLoaded(WIKIPEDIA_DATA_SOURCE);
+          }
+        }, "wikipedia segment load"
+    );
+  }
+
+  @Test
+  public void testQueriesFromFile() throws Exception
+  {
+    queryHelper.testQueriesFromFile(WIKIPEDIA_QUERIES_RESOURCE, 2);
+  }
+
+}
--- a/integration-tests/src/test/resources/indexer/select.query
+++ b/integration-tests/src/test/resources/indexer/select.query
@ -0,0 +1,19 @@
+{
+        "queryType": "select",
+        "intervals": ["2013-08-31/2013-09-01"],
+        "dataSource": "wikipedia_index_test",
+        "granularity": "all",
+        "filter": {
+            "type": "selector",
+            "dimension": "language",
+            "value": "en"
+        },
+        "pagingSpec": {
+            "threshold": 10
+        },
+        "context": {
+            "useCache": "false",
+            "populateCache": "false",
+            "timeout": 60000
+        }
+    }
--- a/integration-tests/src/test/resources/indexer/select_reindex.query
+++ b/integration-tests/src/test/resources/indexer/select_reindex.query
@ -0,0 +1,19 @@
+{
+        "queryType": "select",
+        "intervals": ["2013-08-31/2013-09-01"],
+        "dataSource": "wikipedia_reindex_test",
+        "granularity": "all",
+        "filter": {
+            "type": "selector",
+            "dimension": "language",
+            "value": "en"
+        },
+        "pagingSpec": {
+            "threshold": 10
+        },
+        "context": {
+            "useCache": "false",
+            "populateCache": "false",
+            "timeout": 60000
+        }
+    }
--- a/integration-tests/src/test/resources/indexer/union_queries.json
+++ b/integration-tests/src/test/resources/indexer/union_queries.json
@ -0,0 +1,564 @@
+[
+    {
+        "description": "timeseries, filtered, all aggs, all",
+        "query": {
+            "queryType": "timeseries",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "filter": {
+                "type": "selector",
+                "dimension": "language",
+                "value": "en"
+            },
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "added",
+                    "name": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "deleted",
+                    "name": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "delta",
+                    "name": "delta"
+                }
+            ],
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T01:02:33.000Z",
+                "result": {
+                    "added": 2064.0,
+                    "count": 8,
+                    "delta": 748.0,
+                    "deleted": 1316.0,
+                    "rows": 8
+                }
+            }
+        ]
+    },
+    {
+        "description": "topN, all aggs, page dim, uniques metric",
+        "query": {
+            "queryType": "topN",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "added",
+                    "name": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "deleted",
+                    "name": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "delta",
+                    "name": "delta"
+                }
+            ],
+            "dimension": "page",
+            "metric": "added",
+            "threshold": 3,
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T01:02:33.000Z",
+                "result": [
+                    {
+                        "added": 3620.0,
+                        "count": 4,
+                        "page": "Crimson Typhoon",
+                        "delta": 3600.0,
+                        "deleted": 20.0,
+                        "rows": 4
+                    },
+                    {
+                        "added": 1836.0,
+                        "count": 4,
+                        "page": "Striker Eureka",
+                        "delta": 1320.0,
+                        "deleted": 516.0,
+                        "rows": 4
+                    },
+                    {
+                        "added": 492.0,
+                        "count": 4,
+                        "page": "Cherno Alpha",
+                        "delta": 444.0,
+                        "deleted": 48.0,
+                        "rows": 4
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "description": "topN, all aggs, page dim, count metric, postAggs",
+        "query": {
+            "queryType": "topN",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "added",
+                    "name": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "deleted",
+                    "name": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "fieldName": "delta",
+                    "name": "delta"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type": "arithmetic",
+                    "name": "sumOfAddedDeletedConst",
+                    "fn": "+",
+                    "fields": [
+                        {
+                            "type": "fieldAccess",
+                            "name": "added",
+                            "fieldName": "added"
+                        },
+                        {
+                            "type": "arithmetic",
+                            "name": "",
+                            "fn": "+",
+                            "fields": [
+                                {
+                                    "type": "fieldAccess",
+                                    "name": "deleted",
+                                    "fieldName": "deleted"
+                                },
+                                {
+                                    "type": "constant",
+                                    "name": "constant",
+                                    "value": 1000
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "dimension": "page",
+            "metric": "added",
+            "threshold": 3,
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T01:02:33.000Z",
+                "result": [
+                    {
+                        "added": 3620.0,
+                        "count": 4,
+                        "page": "Crimson Typhoon",
+                        "delta": 3600.0,
+                        "deleted": 20.0,
+                        "sumOfAddedDeletedConst": 4640.0,
+                        "rows": 4
+                    },
+                    {
+                        "added": 1836.0,
+                        "count": 4,
+                        "page": "Striker Eureka",
+                        "delta": 1320.0,
+                        "deleted": 516.0,
+                        "sumOfAddedDeletedConst": 3352.0,
+                        "rows": 4
+                    },
+                    {
+                        "added": 492.0,
+                        "count": 4,
+                        "page": "Cherno Alpha",
+                        "delta": 444.0,
+                        "deleted": 48.0,
+                        "sumOfAddedDeletedConst": 1540.0,
+                        "rows": 4
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "description": "topN, lexicographic, two aggs, language dim, postAggs",
+        "query": {
+            "queryType": "topN",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type": "arithmetic",
+                    "name": "sumOfRowsAndCount",
+                    "fn": "+",
+                    "fields": [
+                        {
+                            "type": "fieldAccess",
+                            "name": "rows",
+                            "fieldName": "rows"
+                        },
+                        {
+                            "type": "fieldAccess",
+                            "name": "count",
+                            "fieldName": "count"
+                        }
+                    ]
+                }
+            ],
+            "dimension": "language",
+            "metric": {
+                "type": "lexicographic",
+                "previousStop": "a"
+            },
+            "threshold": 3,
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T01:02:33.000Z",
+                "result": [
+                    {
+                        "sumOfRowsAndCount": 16.0,
+                        "count": 8,
+                        "language": "en",
+                        "rows": 8
+                    },
+                    {
+                        "sumOfRowsAndCount": 8.0,
+                        "count": 4,
+                        "language": "ja",
+                        "rows": 4
+                    },
+                    {
+                        "sumOfRowsAndCount": 8.0,
+                        "count": 4,
+                        "language": "ru",
+                        "rows": 4
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "description": "groupBy, two aggs, namespace dim, postAggs",
+        "query": {
+            "queryType": "groupBy",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type": "arithmetic",
+                    "name": "sumOfRowsAndCount",
+                    "fn": "+",
+                    "fields": [
+                        {
+                            "type": "fieldAccess",
+                            "name": "rows",
+                            "fieldName": "rows"
+                        },
+                        {
+                            "type": "fieldAccess",
+                            "name": "count",
+                            "fieldName": "count"
+                        }
+                    ]
+                }
+            ],
+            "dimensions": ["namespace"],
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "version": "v1",
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "event": {
+                    "sumOfRowsAndCount": 16.0,
+                    "count": 8,
+                    "rows": 8,
+                    "namespace": "article"
+                }
+            },
+            {
+                "version": "v1",
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "event": {
+                    "sumOfRowsAndCount": 24.0,
+                    "count": 12,
+                    "rows": 12,
+                    "namespace": "wikipedia"
+                }
+            }
+        ]
+    },
+    {
+        "description": "groupBy, two aggs, namespace + robot dim, postAggs",
+        "query": {
+            "queryType": "groupBy",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "intervals": ["2013-08-31/2013-09-01"],
+            "granularity": "all",
+            "aggregations": [
+                {
+                    "type": "count",
+                    "name": "rows"
+                },
+                {
+                    "type": "longSum",
+                    "fieldName": "count",
+                    "name": "count"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type": "arithmetic",
+                    "name": "sumOfRowsAndCount",
+                    "fn": "+",
+                    "fields": [
+                        {
+                            "type": "fieldAccess",
+                            "name": "rows",
+                            "fieldName": "rows"
+                        },
+                        {
+                            "type": "fieldAccess",
+                            "name": "count",
+                            "fieldName": "count"
+                        }
+                    ]
+                }
+            ],
+            "dimensions": ["namespace", "robot"],
+            "limitSpec": {
+                "type": "default",
+                "limit": 3,
+                "orderBy": ["robot", "namespace"]
+            },
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "version": "v1",
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "event": {
+                    "sumOfRowsAndCount": 8.0,
+                    "count": 4,
+                    "robot": "false",
+                    "rows": 4,
+                    "namespace": "article"
+                }
+            },
+            {
+                "version": "v1",
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "event": {
+                    "sumOfRowsAndCount": 8.0,
+                    "count": 4,
+                    "robot": "true",
+                    "rows": 4,
+                    "namespace": "article"
+                }
+            },
+            {
+                "version": "v1",
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "event": {
+                    "sumOfRowsAndCount": 24.0,
+                    "count": 12,
+                    "robot": "true",
+                    "rows": 12,
+                    "namespace": "wikipedia"
+                }
+            }
+        ]
+    },
+    {
+        "query": {
+            "queryType": "search",
+            "intervals": ["2013-08-31/2013-09-01"],
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            },
+            "granularity": "all",
+            "query": {
+                "type": "insensitive_contains",
+                "value": "ip"
+            },
+            "context": {
+                "useCache": "true",
+                "populateCache": "true",
+                "timeout": 60000
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T00:00:00.000Z",
+                "result": [
+                    {
+                        "dimension": "user",
+                        "value": "triplets"
+                    },
+                    {
+                        "dimension": "namespace",
+                        "value": "wikipedia"
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "description": "timeboundary, 1 agg, union",
+        "query": {
+            "queryType": "timeBoundary",
+            "dataSource": {
+                "type": "union",
+                "dataSources": [
+                    "wikipedia_index_test1", "wikipedia_index_test2", "wikipedia_index_test3",
+                    "wikipedia_index_test0"
+                ]
+            }
+        },
+        "expectedResults": [
+            {
+                "timestamp": "2013-08-31T01:02:33.000Z",
+                "result": {
+                    "minTime": "2013-08-31T01:02:33.000Z",
+                    "maxTime": "2013-08-31T12:41:27.000Z"
+                }
+            }
+        ]
+    }
+]
--- a/integration-tests/src/test/resources/indexer/union_select_query.json
+++ b/integration-tests/src/test/resources/indexer/union_select_query.json
@ -0,0 +1,121 @@
+{
+    "query": {
+        "queryType": "select",
+        "intervals": ["2013-08-31/2013-09-01"],
+        "dataSource": {
+            "type": "union",
+            "dataSources": [
+                "wikipedia_index_test"
+            ]
+        },
+        "granularity": "all",
+        "filter": {
+            "type": "selector",
+            "dimension": "language",
+            "value": "en"
+        },
+        "pagingSpec": {
+            "threshold": 10
+        },
+        "context": {
+            "useCache": "false",
+            "populateCache": "false",
+            "timeout": 60000
+        }
+    },
+    "expectedResults": [
+        {
+            "timestamp": "2013-08-31T01:02:33.000Z",
+            "result": {
+                "pagingIdentifiers": {
+                    "wikipedia_index_test0_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z": 0,
+                    "wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z": 0,
+                    "wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z": 0,
+                    "wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z": 0
+                },
+                "events": [
+                    {
+                        "segmentId": "wikipedia_index_test0_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T01:02:33.000Z",
+                            "page": "Gypsy Danger",
+                            "added": 57.0,
+                            "deleted": 200.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T01:02:33.000Z",
+                            "page": "Gypsy Danger",
+                            "added": 57.0,
+                            "deleted": 200.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T01:02:33.000Z",
+                            "page": "Gypsy Danger",
+                            "added": 57.0,
+                            "deleted": 200.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T01:02:33.000Z",
+                            "page": "Gypsy Danger",
+                            "added": 57.0,
+                            "deleted": 200.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test0_2013-08-31T0com.metamx.common.ISE: one or more twitter  queries failed0:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:43.993Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T03:32:45.000Z",
+                            "page": "Striker Eureka",
+                            "added": 459.0,
+                            "deleted": 129.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test1_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.108Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T03:32:45.000Z",
+                            "page": "Striker Eureka",
+                            "added": 459.0,
+                            "deleted": 129.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test2_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.236Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T03:32:45.000Z",
+                            "page": "Striker Eureka",
+                            "added": 459.0,
+                            "deleted": 129.0
+                        }
+                    },
+                    {
+                        "segmentId": "wikipedia_index_test3_2013-08-31T00:00:00.000Z_2013-09-01T00:00:00.000Z_2014-05-01T15:27:44.374Z",
+                        "offset": 0,
+                        "event": {
+                            "timestamp": "2013-08-31T03:32:45.000Z",
+                            "page": "Striker Eureka",
+                            "added": 459.0,
+                            "deleted": 129.0
+                        }
+                    }
+                ]
+            }
+        }
+    ]
+}
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_data.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_data.json
@ -0,0 +1,5 @@
+{"timestamp": "2013-08-31T01:02:33Z", "page": "Gypsy Danger", "language" : "en", "user" : "nuclear", "unpatrolled" : "true", "newPage" : "true", "robot": "false", "anonymous": "false", "namespace":"article", "continent":"North America", "country":"United States", "region":"Bay Area", "city":"San Francisco", "added": 57, "deleted": 200, "delta": -143}
+{"timestamp": "2013-08-31T03:32:45Z", "page": "Striker Eureka", "language" : "en", "user" : "speed", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Australia", "country":"Australia", "region":"Cantebury", "city":"Syndey", "added": 459, "deleted": 129, "delta": 330}
+{"timestamp": "2013-08-31T07:11:21Z", "page": "Cherno Alpha", "language" : "ru", "user" : "masterYi", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"article", "continent":"Asia", "country":"Russia", "region":"Oblast", "city":"Moscow", "added": 123, "deleted": 12, "delta": 111}
+{"timestamp": "2013-08-31T11:58:39Z", "page": "Crimson Typhoon", "language" : "zh", "user" : "triplets", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"China", "region":"Shanxi", "city":"Taiyuan", "added": 905, "deleted": 5, "delta": 900}
+{"timestamp": "2013-08-31T12:41:27Z", "page": "Coyote Tango", "language" : "ja", "user" : "stringer", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"Japan", "region":"Kanto", "city":"Tokyo", "added": 1, "deleted": 10, "delta": -9}
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_queries.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_queries.json
@ -0,0 +1,16 @@
+[
+    {
+        "description": "timeseries, 1 agg, all",
+        "query":{
+            "queryType" : "timeBoundary",
+            "dataSource": "wikipedia_index_test"
+        },
+        "expectedResults":[ {
+                                "timestamp" : "2013-08-31T01:02:33.000Z",
+                                "result" : {
+                                    "minTime" : "2013-08-31T01:02:33.000Z",
+                                    "maxTime" : "2013-08-31T12:41:27.000Z"
+                                }
+                            } ]
+    }
+]
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json
@ -0,0 +1,59 @@
+{
+    "type": "index",
+    "spec": {
+        "dataSchema": {
+            "dataSource": "wikipedia_index_test",
+            "metricsSpec": [
+                {
+                    "type": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "added",
+                    "fieldName": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "deleted",
+                    "fieldName": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "delta",
+                    "fieldName": "delta"
+                }
+            ],
+            "granularitySpec": {
+                "segmentGranularity": "DAY",
+                "queryGranularity": "second",
+                "intervals" : [ "2013-08-31/2013-09-01" ]
+            },
+            "parser": {
+                "parseSpec": {
+                    "format" : "json",
+                    "timestampSpec": {
+                        "column": "timestamp"
+                    },
+                    "dimensionsSpec": {
+                        "dimensions": [
+                            "page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous",
+                            "namespace", "continent", "country", "region", "city"
+                        ]
+                    }
+                }
+            }
+        },
+        "ioConfig": {
+            "type": "index",
+            "firehose": {
+                "type": "local",
+                "baseDir": "/resources/indexer",
+                "filter": "wikipedia_index_data.json"
+            }
+        },
+        "tuningConfig": {
+            "type": "index"
+        }
+    }
+}
--- a/Show More
+++ b/Show More