Merge branch 'master' into druid-0.7.x

Conflicts: cassandra-storage/pom.xml common/pom.xml examples/config/historical/runtime.properties examples/config/overlord/runtime.properties examples/config/realtime/runtime.properties examples/pom.xml hdfs-storage/pom.xml histogram/pom.xml indexing-hadoop/pom.xml indexing-service/pom.xml kafka-eight/pom.xml kafka-seven/pom.xml pom.xml processing/pom.xml rabbitmq/pom.xml s3-extensions/pom.xml server/pom.xml services/pom.xml
2014-08-12 14:00:21 -07:00 · 2014-08-12 14:00:21 -07:00 · 161c2f0fff
parent b740987381 4e3f4fbc22
commit 161c2f0fff
19 changed files with 57 additions and 39 deletions
--- a/docs/content/Configuration.md
+++ b/docs/content/Configuration.md
@ -106,6 +106,7 @@ This module contains query processing functionality.
 |`druid.processing.buffer.sizeBytes`|This specifies a buffer size for the storage of intermediate results. The computation engine in both the Historical and Realtime nodes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed.|1073741824 (1GB)|
 |`druid.processing.formatString`|Realtime and historical nodes use this format string to name their processing threads.|processing-%s|
 |`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)|
+|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)|


 ### Metrics Module
--- a/docs/content/Examples.md
+++ b/docs/content/Examples.md
@ -19,13 +19,13 @@ Clone Druid and build it:
 git clone https://github.com/metamx/druid.git druid
 cd druid
 git fetch --tags
-git checkout druid-0.6.143
+git checkout druid-0.6.144
 ./build.sh
 ```

 ### Downloading the DSK (Druid Standalone Kit)

-[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.143-bin.tar.gz) a stand-alone tarball and run it:
+[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.144-bin.tar.gz) a stand-alone tarball and run it:

 ``` bash
 tar -xzf druid-services-0.X.X-bin.tar.gz
--- a/docs/content/Kafka-Eight.md
+++ b/docs/content/Kafka-Eight.md
@ -8,9 +8,9 @@ The previous examples are for Kafka 7. To support Kafka 8, a couple changes need

 - Update realtime node's configs for Kafka 8 extensions
  - e.g.
-    - `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.143",...]`
+    - `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.144",...]`
    - becomes
-    - `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.143",...]`
+    - `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.144",...]`
 - Update realtime task config for changed keys
  - `firehose.type`, `plumber.rejectionPolicyFactory`, and all of `firehose.consumerProps` changes.

--- a/docs/content/Production-Cluster-Configuration.md
+++ b/docs/content/Production-Cluster-Configuration.md
@ -57,7 +57,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/overlord

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.144"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
@ -139,7 +139,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/middlemanager

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.143","io.druid.extensions:druid-kafka-seven:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.144","io.druid.extensions:druid-kafka-seven:0.6.144"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
@ -286,7 +286,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/historical

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.144"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
--- a/docs/content/Realtime-Config.md
+++ b/docs/content/Realtime-Config.md
@ -27,7 +27,7 @@ druid.host=localhost
 druid.service=realtime
 druid.port=8083

-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.144"]


 druid.zk.service.host=localhost
@ -76,7 +76,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/realtime

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.143","io.druid.extensions:druid-kafka-seven:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.144","io.druid.extensions:druid-kafka-seven:0.6.144"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
--- a/docs/content/Simple-Cluster-Configuration.md
+++ b/docs/content/Simple-Cluster-Configuration.md
@ -28,7 +28,7 @@ Configuration:

 -Ddruid.zk.service.host=localhost

-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.143"]
+-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.144"]

 -Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
 -Ddruid.db.connector.user=druid
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 ### Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.143-bin.tar.gz). Download this file to a directory of your choosing.
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.144-bin.tar.gz). Download this file to a directory of your choosing.

 You can extract the awesomeness within by issuing:

@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.143
+cd druid-services-0.6.144
 ```

 You should see a bunch of files:
--- a/docs/content/Tutorial:-Loading-Your-Data-Part-1.md
+++ b/docs/content/Tutorial:-Loading-Your-Data-Part-1.md
@ -91,7 +91,7 @@ druid.service=overlord

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.144"]

 druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
 druid.db.connector.user=druid
--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes and external depende

 If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.

-You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.143-bin.tar.gz)
+You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.144-bin.tar.gz)

 and untar the contents within by issuing:

@ -149,7 +149,7 @@ druid.port=8081

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.144"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@ -240,7 +240,7 @@ druid.port=8083

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.143","io.druid.extensions:druid-kafka-seven:0.6.143"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.144","io.druid.extensions:druid-kafka-seven:0.6.144"]

 # Change this config to db to hand off to the rest of the Druid cluster
 druid.publish.type=noop
--- a/docs/content/Tutorial:-Webstream.md
+++ b/docs/content/Tutorial:-Webstream.md
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 h3. Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.143-bin.tar.gz)
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.144-bin.tar.gz)
 Download this file to a directory of your choosing.
 You can extract the awesomeness within by issuing:

@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.143
+cd druid-services-0.6.144
 ```

 You should see a bunch of files:
--- a/docs/content/Twitter-Tutorial.md
+++ b/docs/content/Twitter-Tutorial.md
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.

 # Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.143-bin.tar.gz).
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.144-bin.tar.gz).
 Download this bad boy to a directory of your choosing.

 You can extract the awesomeness within by issuing:
--- a/examples/config/_global/global.runtime.properties
+++ b/examples/config/_global/global.runtime.properties
@ -1,5 +1,5 @@
 # Extensions
-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.138","io.druid.extensions:druid-kafka-seven:0.6.138","io.druid.extensions:druid-rabbitmq:0.6.138", "io.druid.extensions:druid-s3-extensions:0.6.138"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.7.0","io.druid.extensions:druid-kafka-seven:0.7.0","io.druid.extensions:druid-rabbitmq:0.7.0", "io.druid.extensions:druid-s3-extensions:0.7.0"]

 # Zookeeper
 druid.zk.service.host=localhost
@ -20,4 +20,4 @@ druid.selectors.indexing.serviceName=overlord
 # druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]

 # Metrics logging (disabled for examples)
-druid.emitter=noop
+druid.emitter=noop
--- a/examples/config/historical/runtime.properties
+++ b/examples/config/historical/runtime.properties
@ -2,7 +2,7 @@ druid.host=localhost
 druid.service=historical
 druid.port=8081

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.7.0"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@ -14,4 +14,4 @@ druid.server.maxSize=10000000000
 druid.processing.buffer.sizeBytes=100000000
 druid.processing.numThreads=1

-druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
+druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
--- a/examples/config/overlord/runtime.properties
+++ b/examples/config/overlord/runtime.properties
@ -5,4 +5,4 @@ druid.service=overlord
 druid.indexer.queue.startDelay=PT0M
 druid.indexer.runner.javaOpts="-server -Xmx256m"
 druid.indexer.fork.property.druid.processing.numThreads=1
-druid.indexer.fork.property.druid.computation.buffer.size=100000000
+druid.indexer.fork.property.druid.computation.buffer.size=100000000
--- a/examples/config/realtime/runtime.properties
+++ b/examples/config/realtime/runtime.properties
@ -9,4 +9,4 @@ druid.processing.buffer.sizeBytes=100000000
 druid.processing.numThreads=1

 # Enable Real monitoring
-# druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor","io.druid.segment.realtime.RealtimeMetricsMonitor"]
+# druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor","io.druid.segment.realtime.RealtimeMetricsMonitor"]
--- a/processing/src/main/java/io/druid/query/DruidProcessingConfig.java
+++ b/processing/src/main/java/io/druid/query/DruidProcessingConfig.java
@ -42,6 +42,6 @@ public abstract class DruidProcessingConfig extends ExecutorServiceConfig implem
  @Config(value = "${base_path}.columnCache.sizeBytes")
  public int columnCacheSizeBytes()
  {
-    return 1024 * 1024;
+    return 0;
  }
 }
--- a/processing/src/main/java/io/druid/query/FinalizeResultsQueryRunner.java
+++ b/processing/src/main/java/io/druid/query/FinalizeResultsQueryRunner.java
@ -22,6 +22,7 @@ package io.druid.query;
 import com.google.common.base.Function;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.metamx.common.ISE;
 import com.metamx.common.guava.Sequence;
 import com.metamx.common.guava.Sequences;
 import io.druid.query.aggregation.MetricManipulationFn;
@ -75,9 +76,14 @@ public class FinalizeResultsQueryRunner<T> implements QueryRunner<T>

        @Override
        @SuppressWarnings("unchecked")
-        public T apply(@Nullable T input)
+        public T apply(T input)
        {
          Result<BySegmentResultValueClass<T>> result = (Result<BySegmentResultValueClass<T>>) input;
+
+          if (input == null) {
+            throw new ISE("Cannot have a null result!");
+          }
+
          BySegmentResultValueClass<T> resultsClass = result.getValue();

          return (T) new Result<BySegmentResultValueClass>(
--- a/processing/src/main/java/io/druid/segment/data/GenericIndexed.java
+++ b/processing/src/main/java/io/druid/segment/data/GenericIndexed.java
@ -24,7 +24,9 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 import com.google.common.primitives.Ints;
 import com.metamx.common.IAE;
+import com.metamx.common.Pair;
 import com.metamx.common.guava.CloseQuietly;
+import com.metamx.common.logger.Logger;

 import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
@ -51,6 +53,8 @@ import java.util.Map;
 */
 public class GenericIndexed<T> implements Indexed<T>, Closeable
 {
+  private static final Logger log = new Logger(GenericIndexed.class);
+
  private static final byte version = 0x1;

  public static final int INITIAL_CACHE_CAPACITY = 16384;
@ -121,11 +125,10 @@ public class GenericIndexed<T> implements Indexed<T>, Closeable
    return new GenericIndexed<T>(theBuffer.asReadOnlyBuffer(), strategy, allowReverseLookup);
  }

-  private static class SizedLRUMap<K, V> extends LinkedHashMap<K, V>
+  private static class SizedLRUMap<K, V> extends LinkedHashMap<K, Pair<Integer, V>>
  {
-    final Map<K, Integer> sizes = Maps.newHashMap();
-    int numBytes = 0;
-    int maxBytes = 0;
+    private final int maxBytes;
+    private int numBytes = 0;

    public SizedLRUMap(int initialCapacity, int maxBytes)
    {
@ -134,20 +137,26 @@ public class GenericIndexed<T> implements Indexed<T>, Closeable
    }

    @Override
-    protected boolean removeEldestEntry(Map.Entry<K, V> eldest)
+    protected boolean removeEldestEntry(Map.Entry<K, Pair<Integer, V>> eldest)
    {
      if (numBytes > maxBytes) {
-        numBytes -= sizes.remove(eldest.getKey());
+        numBytes -= eldest.getValue().lhs;
        return true;
      }
      return false;
    }

-    public V put(K key, V value, int size)
+    public void put(K key, V value, int size)
    {
-      numBytes += size;
-      sizes.put(key, size);
-      return super.put(key, value);
+      final int totalSize = size + 48; // add approximate object overhead
+      numBytes += totalSize;
+      super.put(key, new Pair<>(totalSize, value));
+    }
+
+    public V getValue(Object key)
+    {
+      final Pair<Integer, V> sizeValuePair = super.get(key);
+      return sizeValuePair == null ? null : sizeValuePair.rhs;
    }
  }

@ -206,6 +215,7 @@ public class GenericIndexed<T> implements Indexed<T>, Closeable
      @Override
      protected SizedLRUMap<Integer, T> initialValue()
      {
+        log.debug("Allocating column cache of max size[%d]", maxBytes);
        return new SizedLRUMap<>(INITIAL_CACHE_CAPACITY, maxBytes);
      }
    };
@ -236,7 +246,7 @@ public class GenericIndexed<T> implements Indexed<T>, Closeable
    }

    if(cacheable) {
-      final T cached = cachedValues.get().get(index);
+      final T cached = cachedValues.get().getValue(index);
      if (cached != null) {
        return cached;
      }
@ -329,6 +339,7 @@ public class GenericIndexed<T> implements Indexed<T>, Closeable
  public void close() throws IOException
  {
    if(cacheable) {
+      log.debug("Closing column cache");
      cachedValues.get().clear();
      cachedValues.remove();
    }
--- a/rabbitmq/pom.xml
+++ b/rabbitmq/pom.xml
@ -40,4 +40,4 @@
            <scope>test</scope>
        </dependency>
    </dependencies>
-</project>
+</project>