diff --git a/build.sh b/build.sh
index 261bfc0835a..9b6148b6c4c 100755
--- a/build.sh
+++ b/build.sh
@@ -30,4 +30,4 @@ echo "For examples, see: "
echo " "
ls -1 examples/*/*sh
echo " "
-echo "See also http://druid.io/docs/0.6.24"
+echo "See also http://druid.io/docs/0.6.26"
diff --git a/cassandra-storage/pom.xml b/cassandra-storage/pom.xml
index b9190260274..c821f9aa69b 100644
--- a/cassandra-storage/pom.xml
+++ b/cassandra-storage/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/common/pom.xml b/common/pom.xml
index 74172822ce2..24c37411ba3 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/docs/content/Batch-ingestion.md b/docs/content/Batch-ingestion.md
index 309d4d3ea32..2f3ee9821d7 100644
--- a/docs/content/Batch-ingestion.md
+++ b/docs/content/Batch-ingestion.md
@@ -202,7 +202,7 @@ The schema of the Hadoop Index Task contains a task "type" and a Hadoop Index Co
"type" : "static",
"paths" : "data.json"
},
- "targetPartitionSize" : 5000000,
+ "targetPartitionSi:qze" : 5000000,
"rollupSpec" : {
"aggs": [{
"type" : "count",
diff --git a/docs/content/Booting-a-production-cluster.md b/docs/content/Booting-a-production-cluster.md
index 6877c901134..61755b5733d 100644
--- a/docs/content/Booting-a-production-cluster.md
+++ b/docs/content/Booting-a-production-cluster.md
@@ -3,7 +3,7 @@ layout: doc_page
---
# Booting a Single Node Cluster #
-[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.24-bin.tar.gz).
+[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.26-bin.tar.gz).
The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables:
diff --git a/docs/content/DimensionSpecs.md b/docs/content/DimensionSpecs.md
new file mode 100644
index 00000000000..bb1dda63221
--- /dev/null
+++ b/docs/content/DimensionSpecs.md
@@ -0,0 +1,76 @@
+---
+layout: doc_page
+---
+
+## DimensionSpec
+
+`DimensionSpec`s define how dimension values get transformed prior to aggregation.
+
+### DefaultDimensionSpec
+
+Returns dimension values as is and optionally renames renames the dimension.
+
+```json
+{ "type" : "default", "dimension" : , "outputName": }
+```
+
+### ExtractionDimensionSpec
+
+Returns dimension values transformed using the given [DimExtractionFn](#toc_3)
+
+```json
+{
+ "type" : "extraction",
+ "dimension" : ,
+ "outputName" : ,
+ "dimExtractionFn" :
+}
+```
+
+## DimExtractionFn
+
+`DimExtractionFn`s define the transformation applied to each dimenion value
+
+### RegexDimExtractionFn
+
+Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is.
+
+```json
+{ "type" : "regex", "expr", }
+```
+
+### PartialDimExtractionFn
+
+Returns the dimension value as is if there is a match, otherwise returns null.
+
+```json
+{ "type" : "partial", "expr", }
+```
+
+### SearchQuerySpecDimExtractionFn
+
+Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null.
+
+```json
+{ "type" : "searchQuery", "query" : }
+```
+
+### TimeDimExtractionFn
+
+Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format
+
+```json
+{ "type" : "time", "timeFormat" : , "resultFormat" : }
+```
+
+### JavascriptDimExtractionFn
+
+Returns the dimension value as transformed by the given JavaScript function.
+
+Example
+
+```json
+{
+ "type" : "javascript",
+ "function" : "function(str) { return str.substr(0, 3); }"
+}
diff --git a/docs/content/Examples.md b/docs/content/Examples.md
index dd377e82cdb..81549a7ec2d 100644
--- a/docs/content/Examples.md
+++ b/docs/content/Examples.md
@@ -19,13 +19,13 @@ Clone Druid and build it:
git clone https://github.com/metamx/druid.git druid
cd druid
git fetch --tags
-git checkout druid-0.6.24
+git checkout druid-0.6.26
./build.sh
```
### Downloading the DSK (Druid Standalone Kit)
-[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) a stand-alone tarball and run it:
+[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz) a stand-alone tarball and run it:
``` bash
tar -xzf druid-services-0.X.X-bin.tar.gz
diff --git a/docs/content/Historical.md b/docs/content/Historical.md
index 32180ddaf96..b5fb1ebcfcf 100644
--- a/docs/content/Historical.md
+++ b/docs/content/Historical.md
@@ -32,7 +32,6 @@ druid.server.maxSize=100000000
druid.processing.buffer.sizeBytes=10000000
-druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]```
```
diff --git a/docs/content/Realtime.md b/docs/content/Realtime.md
index 048a8fdc22c..3d6c432add1 100644
--- a/docs/content/Realtime.md
+++ b/docs/content/Realtime.md
@@ -27,7 +27,7 @@ druid.host=localhost
druid.service=realtime
druid.port=8083
-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.24"]
+druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.26"]
druid.zk.service.host=localhost
diff --git a/docs/content/SearchQuery.md b/docs/content/SearchQuery.md
index 6cee0918e91..2cfc726d60b 100644
--- a/docs/content/SearchQuery.md
+++ b/docs/content/SearchQuery.md
@@ -3,26 +3,27 @@ layout: doc_page
---
A search query returns dimension values that match the search specification.
- {
- "queryType": "search",
- "dataSource": "sample_datasource",
- "granularity": "day",
- "searchDimensions": [
- "dim1",
- "dim2"
- ],
- "query": {
- "type": "insensitive_contains",
- "value": "Ke"
- },
- "sort" : {
- "type": "lexicographic"
- },
- "intervals": [
- "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
- ]
- }
-
+```json
+{
+ "queryType": "search",
+ "dataSource": "sample_datasource",
+ "granularity": "day",
+ "searchDimensions": [
+ "dim1",
+ "dim2"
+ ],
+ "query": {
+ "type": "insensitive_contains",
+ "value": "Ke"
+ },
+ "sort" : {
+ "type": "lexicographic"
+ },
+ "intervals": [
+ "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000"
+ ]
+}
+```
There are several main parts to a search query:
@@ -40,32 +41,33 @@ There are several main parts to a search query:
The format of the result is:
- [
+```json
+[
+ {
+ "timestamp": "2012-01-01T00:00:00.000Z",
+ "result": [
{
- "timestamp": "2012-01-01T00:00:00.000Z",
- "result": [
- {
- "dimension": "dim1",
- "value": "Ke$ha"
- },
- {
- "dimension": "dim2",
- "value": "Ke$haForPresident"
- }
- ]
+ "dimension": "dim1",
+ "value": "Ke$ha"
},
{
- "timestamp": "2012-01-02T00:00:00.000Z",
- "result": [
- {
- "dimension": "dim1",
- "value": "SomethingThatContainsKe"
- },
- {
- "dimension": "dim2",
- "value": "SomethingElseThatContainsKe"
- }
- ]
+ "dimension": "dim2",
+ "value": "Ke$haForPresident"
}
]
-
+ },
+ {
+ "timestamp": "2012-01-02T00:00:00.000Z",
+ "result": [
+ {
+ "dimension": "dim1",
+ "value": "SomethingThatContainsKe"
+ },
+ {
+ "dimension": "dim2",
+ "value": "SomethingElseThatContainsKe"
+ }
+ ]
+ }
+]
+```
diff --git a/docs/content/Tasks.md b/docs/content/Tasks.md
index 83326d842b6..5ad56b55d06 100644
--- a/docs/content/Tasks.md
+++ b/docs/content/Tasks.md
@@ -78,6 +78,18 @@ The Hadoop Index Task is used to index larger data sets that require the paralle
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
+##### Using your own Hadoop distribution
+
+Druid is compiled against Apache hadoop-core 1.0.3. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-core 1.0.3, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution.
+
+##### Resolving dependency conflicts running HadoopIndexTask
+
+Currently, the HadoopIndexTask creates a single classpath to run the HadoopDruidIndexerJob, which can lead to version conflicts between various dependencies of Druid, extension modules, and Hadoop's own dependencies.
+
+The Hadoop index task will put Druid's dependencies first on the classpath, followed by any extensions dependencies, and any Hadoop dependencies last.
+
+If you are having trouble with any extensions in HadoopIndexTask, it may be the case that Druid, or one of its dependencies, depends on a different version of a library than what you are using as part of your extensions, but Druid's version overrides the one in your extension. In that case you probably want to build your own Druid version and override the offending library by adding an explicit dependency to the pom.xml of each druid sub-module that depends on it.
+
#### Realtime Index Task
The indexing service can also run real-time tasks. These tasks effectively transform a middle manager into a real-time node. We introduced real-time tasks as a way to programmatically add new real-time data sources without needing to manually add nodes. The grammar for the real-time task is as follows:
diff --git a/docs/content/Tutorial:-A-First-Look-at-Druid.md b/docs/content/Tutorial:-A-First-Look-at-Druid.md
index a8b85aa1b80..548d573c6f1 100644
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
### Download a Tarball
-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz). Download this file to a directory of your choosing.
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz). Download this file to a directory of your choosing.
You can extract the awesomeness within by issuing:
@@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
Not too lost so far right? That's great! If you cd into the directory:
```
-cd druid-services-0.6.24
+cd druid-services-0.6.26
```
You should see a bunch of files:
diff --git a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md
index 51539b5dcdc..60d5487784d 100644
--- a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md
+++ b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md
@@ -44,7 +44,7 @@ With real-world data, we recommend having a message bus such as [Apache Kafka](h
#### Setting up Kafka
-[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.24/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
+[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.26/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html).
diff --git a/docs/content/Tutorial:-The-Druid-Cluster.md b/docs/content/Tutorial:-The-Druid-Cluster.md
index 39b5043a25c..e954c3de257 100644
--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes as well as and exter
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
-You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)
+You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz)
and untar the contents within by issuing:
@@ -149,7 +149,7 @@ druid.port=8081
druid.zk.service.host=localhost
-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"]
# Dummy read only AWS account (used to download example data)
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@@ -159,7 +159,6 @@ druid.server.maxSize=100000000
druid.processing.buffer.sizeBytes=10000000
-druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
```
@@ -239,7 +238,7 @@ druid.port=8083
druid.zk.service.host=localhost
-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26"]
# Change this config to db to hand off to the rest of the Druid cluster
druid.publish.type=noop
diff --git a/docs/content/Tutorial:-Webstream.md b/docs/content/Tutorial:-Webstream.md
index 300928684f1..c8b83d1e00c 100644
--- a/docs/content/Tutorial:-Webstream.md
+++ b/docs/content/Tutorial:-Webstream.md
@@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
h3. Download a Tarball
-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz)
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz)
Download this file to a directory of your choosing.
You can extract the awesomeness within by issuing:
@@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
Not too lost so far right? That's great! If you cd into the directory:
```
-cd druid-services-0.6.24
+cd druid-services-0.6.26
```
You should see a bunch of files:
diff --git a/docs/content/Twitter-Tutorial.textile b/docs/content/Twitter-Tutorial.textile
index edbc5c38b77..9e368b13f10 100644
--- a/docs/content/Twitter-Tutorial.textile
+++ b/docs/content/Twitter-Tutorial.textile
@@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.
h3. Download a Tarball
-We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz.
+We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.26-bin.tar.gz.
Download this bad boy to a directory of your choosing.
You can extract the awesomeness within by issuing:
diff --git a/docs/content/toc.textile b/docs/content/toc.textile
index 82b58c03b95..9f212df47e6 100644
--- a/docs/content/toc.textile
+++ b/docs/content/toc.textile
@@ -36,11 +36,12 @@ h2. Querying
** "Aggregations":./Aggregations.html
** "Post Aggregations":./Post-aggregations.html
** "Granularities":./Granularities.html
+** "DimensionSpecs":./DimensionSpecs.html
* Query Types
** "GroupByQuery":./GroupByQuery.html
*** "OrderBy":./OrderBy.html
*** "Having":./Having.html
-** "SearchQuery":./Having.html
+** "SearchQuery":./SearchQuery.html
*** "SearchQuerySpec":./SearchQuerySpec.html
** "SegmentMetadataQuery":./SegmentMetadataQuery.html
** "TimeBoundaryQuery":./TimeBoundaryQuery.html
diff --git a/examples/bin/examples/rabbitmq/rabbitmq_realtime.spec b/examples/bin/examples/rabbitmq/rabbitmq_realtime.spec
index fc26f21ee3a..528e81f39cc 100644
--- a/examples/bin/examples/rabbitmq/rabbitmq_realtime.spec
+++ b/examples/bin/examples/rabbitmq/rabbitmq_realtime.spec
@@ -26,7 +26,11 @@
"routingKey": "#",
"durable": "true",
"exclusive": "false",
- "autoDelete": "false"
+ "autoDelete": "false",
+
+ "maxRetries": "10",
+ "retryIntervalSeconds": "1",
+ "maxDurationSeconds": "300"
},
"parser" : {
"timestampSpec" : { "column" : "utcdt", "format" : "iso" },
diff --git a/examples/config/historical/runtime.properties b/examples/config/historical/runtime.properties
index 15ca0750ca1..67cc44f74d7 100644
--- a/examples/config/historical/runtime.properties
+++ b/examples/config/historical/runtime.properties
@@ -4,7 +4,7 @@ druid.port=8081
druid.zk.service.host=localhost
-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.26"]
# Dummy read only AWS account (used to download example data)
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@@ -16,5 +16,4 @@ druid.server.maxSize=100000000
druid.processing.buffer.sizeBytes=10000000
druid.processing.numThreads=1
-druid.segmentCache.infoPath=/tmp/druid/segmentInfoCache
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 100000000}]
\ No newline at end of file
diff --git a/examples/config/realtime/runtime.properties b/examples/config/realtime/runtime.properties
index 0eb7ef4dd3d..aefabeda473 100644
--- a/examples/config/realtime/runtime.properties
+++ b/examples/config/realtime/runtime.properties
@@ -4,7 +4,7 @@ druid.port=8083
druid.zk.service.host=localhost
-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.26","io.druid.extensions:druid-kafka-seven:0.6.26","io.druid.extensions:druid-rabbitmq:0.6.26"]
# Change this config to db to hand off to the rest of the Druid cluster
druid.publish.type=noop
diff --git a/examples/pom.xml b/examples/pom.xml
index b6094213411..30ee4e6f586 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/hdfs-storage/pom.xml b/hdfs-storage/pom.xml
index 8dbb858663c..71da35eff72 100644
--- a/hdfs-storage/pom.xml
+++ b/hdfs-storage/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml
index 5a7ff2cfe57..3dfb59c2824 100644
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index 26983285c9e..9b718e7e172 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/kafka-eight/pom.xml b/kafka-eight/pom.xml
index ac3f89eda65..c92048c47b3 100644
--- a/kafka-eight/pom.xml
+++ b/kafka-eight/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/kafka-seven/pom.xml b/kafka-seven/pom.xml
index 9fd061e853f..cd5ef152864 100644
--- a/kafka-seven/pom.xml
+++ b/kafka-seven/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/pom.xml b/pom.xml
index 7dac175bc12..e0dff689ffa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
io.druid
druid
pom
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
druid
druid
@@ -57,6 +57,7 @@
s3-extensions
kafka-seven
kafka-eight
+ rabbitmq
@@ -368,11 +369,6 @@
aether-api
0.9.0.M2
-
- com.rabbitmq
- amqp-client
- 3.1.1
-
net.java.dev.jets3t
jets3t
diff --git a/processing/pom.xml b/processing/pom.xml
index 64b64dbb898..4c40c9f5a50 100644
--- a/processing/pom.xml
+++ b/processing/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java b/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java
index d2973783b9c..fef4b26f0f2 100644
--- a/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java
+++ b/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java
@@ -57,7 +57,7 @@ public class JavaScriptPostAggregator implements PostAggregator
final ScriptableObject scope = context.initStandardObjects();
- final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "aggregate", 1, null);
+ final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
Context.exit();
diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
index 65b5c2a5d7b..0509c92714a 100644
--- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
+++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
@@ -29,7 +29,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
@JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
- @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class)
+ @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
+ @JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
})
public interface DimExtractionFn
{
diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java
new file mode 100644
index 00000000000..1878df5479e
--- /dev/null
+++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java
@@ -0,0 +1,94 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013 Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+package io.druid.query.extraction;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Function;
+import org.mozilla.javascript.Context;
+import org.mozilla.javascript.ContextFactory;
+import org.mozilla.javascript.ScriptableObject;
+
+import java.nio.ByteBuffer;
+
+public class JavascriptDimExtractionFn implements DimExtractionFn
+{
+ private static Function compile(String function) {
+ final ContextFactory contextFactory = ContextFactory.getGlobal();
+ final Context context = contextFactory.enterContext();
+ context.setOptimizationLevel(9);
+
+ final ScriptableObject scope = context.initStandardObjects();
+
+ final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null);
+ Context.exit();
+
+
+ return new Function()
+ {
+ public String apply(String input)
+ {
+ // ideally we need a close() function to discard the context once it is not used anymore
+ Context cx = Context.getCurrentContext();
+ if (cx == null) {
+ cx = contextFactory.enterContext();
+ }
+
+ return Context.toString(fn.call(cx, scope, scope, new String[]{input}));
+ }
+ };
+ }
+
+ private static final byte CACHE_TYPE_ID = 0x4;
+
+ private final String function;
+ private final Function fn;
+
+ @JsonCreator
+ public JavascriptDimExtractionFn(
+ @JsonProperty("function") String function
+ )
+ {
+ this.function = function;
+ this.fn = compile(function);
+ }
+
+ @JsonProperty
+ public String getFunction()
+ {
+ return function;
+ }
+
+ @Override
+ public byte[] getCacheKey()
+ {
+ byte[] bytes = function.getBytes();
+ return ByteBuffer.allocate(1 + bytes.length)
+ .put(CACHE_TYPE_ID)
+ .put(bytes)
+ .array();
+ }
+
+ @Override
+ public String apply(String dimValue)
+ {
+ return fn.apply(dimValue);
+ }
+}
diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java
new file mode 100644
index 00000000000..cc5a1b26b4e
--- /dev/null
+++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java
@@ -0,0 +1,273 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013 Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+package io.druid.query.extraction.extraction;
+
+import com.google.common.collect.Iterators;
+import io.druid.query.extraction.DimExtractionFn;
+import io.druid.query.extraction.JavascriptDimExtractionFn;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Iterator;
+
+public class JavascriptDimExtractionFnTest
+{
+ private static final String[] testStrings = {
+ "Quito",
+ "Calgary",
+ "Tokyo",
+ "Stockholm",
+ "Vancouver",
+ "Pretoria",
+ "Wellington",
+ "Ontario"
+ };
+
+ @Test
+ public void testJavascriptSubstring()
+ {
+ String function = "function(str) { return str.substring(0,3); }";
+ DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+ for (String str : testStrings) {
+ String res = dimExtractionFn.apply(str);
+ Assert.assertEquals(str.substring(0, 3), res);
+ }
+ }
+
+ @Test
+ public void testJavascriptRegex()
+ {
+ String function = "function(str) { return str.replace(/[aeiou]/g, ''); }";
+ DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+ Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr");
+ for (String str : testStrings) {
+ String res = dimExtractionFn.apply(str);
+ Assert.assertEquals(it.next(), res);
+ }
+ }
+
+ @Test
+ public void testJavaScriptPorterStemmer()
+ {
+ // JavaScript porter stemmer adapted from
+ // https://github.com/kristopolous/Porter-Stemmer/blob/e990a8d456510571d1ef9ef923d2a30a94679e13/PorterStemmer1980.js
+ String function = "function(w) {"
+ + "var step2list = {\n"
+ + " \"ational\" : \"ate\",\n"
+ + " \"tional\" : \"tion\",\n"
+ + " \"enci\" : \"ence\",\n"
+ + " \"anci\" : \"ance\",\n"
+ + " \"izer\" : \"ize\",\n"
+ + " \"bli\" : \"ble\",\n"
+ + " \"alli\" : \"al\",\n"
+ + " \"entli\" : \"ent\",\n"
+ + " \"eli\" : \"e\",\n"
+ + " \"ousli\" : \"ous\",\n"
+ + " \"ization\" : \"ize\",\n"
+ + " \"ation\" : \"ate\",\n"
+ + " \"ator\" : \"ate\",\n"
+ + " \"alism\" : \"al\",\n"
+ + " \"iveness\" : \"ive\",\n"
+ + " \"fulness\" : \"ful\",\n"
+ + " \"ousness\" : \"ous\",\n"
+ + " \"aliti\" : \"al\",\n"
+ + " \"iviti\" : \"ive\",\n"
+ + " \"biliti\" : \"ble\",\n"
+ + " \"logi\" : \"log\"\n"
+ + " },\n"
+ + "\n"
+ + " step3list = {\n"
+ + " \"icate\" : \"ic\",\n"
+ + " \"ative\" : \"\",\n"
+ + " \"alize\" : \"al\",\n"
+ + " \"iciti\" : \"ic\",\n"
+ + " \"ical\" : \"ic\",\n"
+ + " \"ful\" : \"\",\n"
+ + " \"ness\" : \"\"\n"
+ + " },\n"
+ + "\n"
+ + " c = \"[^aeiou]\", // consonant\n"
+ + " v = \"[aeiouy]\", // vowel\n"
+ + " C = c + \"[^aeiouy]*\", // consonant sequence\n"
+ + " V = v + \"[aeiou]*\", // vowel sequence\n"
+ + "\n"
+ + " mgr0 = \"^(\" + C + \")?\" + V + C, // [C]VC... is m>0\n"
+ + " meq1 = \"^(\" + C + \")?\" + V + C + \"(\" + V + \")?$\", // [C]VC[V] is m=1\n"
+ + " mgr1 = \"^(\" + C + \")?\" + V + C + V + C, // [C]VCVC... is m>1\n"
+ + " s_v = \"^(\" + C + \")?\" + v; "
+ + ""
+ + "var\n"
+ + " stem,\n"
+ + " suffix,\n"
+ + " firstch,\n"
+ + " re,\n"
+ + " re2,\n"
+ + " re3,\n"
+ + " re4,\n"
+ + " debugFunction,\n"
+ + " origword = w;\n"
+ + "\n"
+ + "\n"
+ + " if (w.length < 3) { return w; }\n"
+ + "\n"
+ + " firstch = w.substr(0,1);\n"
+ + " if (firstch == \"y\") {\n"
+ + " w = firstch.toUpperCase() + w.substr(1);\n"
+ + " }\n"
+ + "\n"
+ + " // Step 1a\n"
+ + " re = /^(.+?)(ss|i)es$/;\n"
+ + " re2 = /^(.+?)([^s])s$/;\n"
+ + "\n"
+ + " if (re.test(w)) { \n"
+ + " w = w.replace(re,\"$1$2\"); \n"
+ + "\n"
+ + " } else if (re2.test(w)) {\n"
+ + " w = w.replace(re2,\"$1$2\"); \n"
+ + " }\n"
+ + "\n"
+ + " // Step 1b\n"
+ + " re = /^(.+?)eed$/;\n"
+ + " re2 = /^(.+?)(ed|ing)$/;\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " re = new RegExp(mgr0);\n"
+ + " if (re.test(fp[1])) {\n"
+ + " re = /.$/;\n"
+ + " w = w.replace(re,\"\");\n"
+ + " }\n"
+ + " } else if (re2.test(w)) {\n"
+ + " var fp = re2.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " re2 = new RegExp(s_v);\n"
+ + " if (re2.test(stem)) {\n"
+ + " w = stem;\n"
+ + "\n"
+ + " re2 = /(at|bl|iz)$/;\n"
+ + " re3 = new RegExp(\"([^aeiouylsz])\\\\1$\");\n"
+ + " re4 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
+ + "\n"
+ + " if (re2.test(w)) { \n"
+ + " w = w + \"e\"; \n"
+ + "\n"
+ + " } else if (re3.test(w)) { \n"
+ + " re = /.$/; \n"
+ + " w = w.replace(re,\"\"); \n"
+ + "\n"
+ + " } else if (re4.test(w)) { \n"
+ + " w = w + \"e\"; \n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "\n"
+ + " // Step 1c\n"
+ + " re = new RegExp(\"^(.*\" + v + \".*)y$\");\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " w = stem + \"i\";\n"
+ + " }\n"
+ + "\n"
+ + " // Step 2\n"
+ + " re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " suffix = fp[2];\n"
+ + " re = new RegExp(mgr0);\n"
+ + " if (re.test(stem)) {\n"
+ + " w = stem + step2list[suffix];\n"
+ + " }\n"
+ + " }\n"
+ + "\n"
+ + " // Step 3\n"
+ + " re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " suffix = fp[2];\n"
+ + " re = new RegExp(mgr0);\n"
+ + " if (re.test(stem)) {\n"
+ + " w = stem + step3list[suffix];\n"
+ + " }\n"
+ + " }\n"
+ + "\n"
+ + " // Step 4\n"
+ + " re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n"
+ + " re2 = /^(.+?)(s|t)(ion)$/;\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " re = new RegExp(mgr1);\n"
+ + " if (re.test(stem)) {\n"
+ + " w = stem;\n"
+ + " }\n"
+ + " } else if (re2.test(w)) {\n"
+ + " var fp = re2.exec(w);\n"
+ + " stem = fp[1] + fp[2];\n"
+ + " re2 = new RegExp(mgr1);\n"
+ + " if (re2.test(stem)) {\n"
+ + " w = stem;\n"
+ + " }\n"
+ + " }\n"
+ + "\n"
+ + " // Step 5\n"
+ + " re = /^(.+?)e$/;\n"
+ + " if (re.test(w)) {\n"
+ + " var fp = re.exec(w);\n"
+ + " stem = fp[1];\n"
+ + " re = new RegExp(mgr1);\n"
+ + " re2 = new RegExp(meq1);\n"
+ + " re3 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n"
+ + " if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {\n"
+ + " w = stem;\n"
+ + " }\n"
+ + " }\n"
+ + "\n"
+ + " re = /ll$/;\n"
+ + " re2 = new RegExp(mgr1);\n"
+ + " if (re.test(w) && re2.test(w)) {\n"
+ + " re = /.$/;\n"
+ + " w = w.replace(re,\"\");\n"
+ + " }\n"
+ + "\n"
+ + " // and turn initial Y back to y\n"
+ + " if (firstch == \"y\") {\n"
+ + " w = firstch.toLowerCase() + w.substr(1);\n"
+ + " }\n"
+ + "\n"
+ + "\n"
+ + " return w;"
+ + ""
+ + "}";
+
+ DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function);
+
+ Iterator inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets");
+ Iterator it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset");
+
+ while(inputs.hasNext()) {
+ String res = dimExtractionFn.apply(inputs.next());
+ Assert.assertEquals(it.next(), res);
+ }
+ }
+}
diff --git a/publications/whitepaper/README.md b/publications/whitepaper/README.md
new file mode 100644
index 00000000000..44025ab1dbf
--- /dev/null
+++ b/publications/whitepaper/README.md
@@ -0,0 +1,4 @@
+Download [MacTeX](http://tug.org/mactex/)
+```bash
+make
+```
diff --git a/publications/whitepaper/acm_proc_article-sp.cls b/publications/whitepaper/acm_proc_article-sp.cls
new file mode 100644
index 00000000000..e908c0443c3
--- /dev/null
+++ b/publications/whitepaper/acm_proc_article-sp.cls
@@ -0,0 +1,1656 @@
+% ACM_PROC_ARTICLE-SP.CLS - VERSION 3.2SP
+% COMPATIBLE WITH THE "SIG-ALTERNATE" V2.4
+% Gerald Murray - April 22nd. 2009
+%
+% ---- Start of 'updates' ----
+%
+% April 22nd. 2009 - Fixed 'Natbib' incompatibility problem - Gerry
+% April 22nd. 2009 - Fixed 'Babel' incompatibility problem - Gerry
+% April 22nd. 2009 - Inserted various bug-fixes and improvements - Gerry
+%
+% To produce Type 1 fonts in the document plus allow for 'normal LaTeX accenting' in the critical areas;
+% title, author block, section-heads, etc. etc.
+% i.e. the whole purpose of this version update is to NOT resort to 'inelegant accent patches'.
+% After much research, three extra .sty packages were added to the the tail (ae, aecompl, aeguill) to solve,
+% in particular, the accenting problem(s). We _could_ ask authors (via instructions/sample file) to 'include' these in
+% the source .tex file - in the preamble - but if everything is already provided ('behind the scenes' - embedded IN the .cls)
+% then this is less work for authors and also makes everything appear 'vanilla'.
+% NOTE: all 'patchwork accenting" has been commented out (here) and is no longer 'used' in the sample .tex file (either).
+% Gerry June 2007
+%
+% Rule widths changed to .5, author count (>6) fixed, roll-back for Type 3 problem. Gerry March 20th. 2007
+% Changes made to 'modernize' the fontnames but esp. for MikTeX users V2.4/2.5 - Nov. 30th. 2006
+% Updated the \email definition to allow for its use inside of 'shared affiliations' - Nov. 30th. 2006
+% Fixed the 'section number depth value' - Nov. 30th. 2006
+%
+% Footnotes inside table cells using \minipage (Oct. 2002)
+% Georgia fixed bug in sub-sub-section numbering in paragraphs (July 29th. 2002)
+% JS/GM fix to vertical spacing before Proofs (July 30th. 2002)
+%
+% Allowance made to switch default fonts between those systems using
+% normal/modern font names and those using 'Type 1' or 'Truetype' fonts.
+% See LINE NUMBER 269 for details.
+% Also provided for enumerated/annotated Corollaries 'surrounded' by
+% enumerated Theorems (line 844).
+% Gerry November 11th. 1999
+%
+% This 'sp' version does NOT produce the permission block.
+%
+% Major change in January 2000 was to include a "blank line" in between
+% new paragraphs. This involved major changes to the, then, acmproc-sp.cls 1.0SP
+% file, precipitating a 'new' name: "acm_proc_article-sp.cls" V2.01SP.
+%
+% ---- End of 'updates' ----
+%
+\def\fileversion{V3.2SP} % for ACM's tracking purposes
+\def\filedate{April 22, 2009} % Gerry Murray's tracking data
+\def\docdate {Wednesday 22nd. April 2009} % Gerry Murray (with deltas to doc}
+\usepackage{epsfig}
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage{amsfonts}
+% Need this for accents in Arial/Helvetica
+%\usepackage[T1]{fontenc} % Gerry March 12, 2007 - causes Type 3 problems (body text)
+%\usepackage{textcomp}
+%
+% ACM_PROC_ARTICLE-SP DOCUMENT STYLE
+% G.K.M. Tobin August-October 1999
+% adapted from ARTICLE document style by Ken Traub, Olin Shivers
+% also using elements of esub2acm.cls
+% LATEST REVISION V3.2SP - APRIL 2009
+% ARTICLE DOCUMENT STYLE -- Released 16 March 1988
+% for LaTeX version 2.09
+% Copyright (C) 1988 by Leslie Lamport
+%
+%
+%%% ACM_PROC_ARTICLE-SP is a document style for producing two-column camera-ready pages for
+%%% ACM conferences, according to ACM specifications. The main features of
+%%% this style are:
+%%%
+%%% 1) Two columns.
+%%% 2) Side and top margins of 4.5pc, bottom margin of 6pc, column gutter of
+%%% 2pc, hence columns are 20pc wide and 55.5pc tall. (6pc =3D 1in, approx)
+%%% 3) First page has title information, and an extra 6pc of space at the
+%%% bottom of the first column for the ACM copyright notice.
+%%% 4) Text is 9pt on 10pt baselines; titles (except main) are 9pt bold.
+%%%
+%%%
+%%% There are a few restrictions you must observe:
+%%%
+%%% 1) You cannot change the font size; ACM wants you to use 9pt.
+%%% 3) You must start your paper with the \maketitle command. Prior to the
+%%% \maketitle you must have \title and \author commands. If you have a
+%%% \date command it will be ignored; no date appears on the paper, since
+%%% the proceedings will have a date on the front cover.
+%%% 4) Marginal paragraphs, tables of contents, lists of figures and tables,
+%%% and page headings are all forbidden.
+%%% 5) The `figure' environment will produce a figure one column wide; if you
+%%% want one that is two columns wide, use `figure*'.
+%%%
+%
+%%% Copyright Space:
+%%% This style automatically leaves 1" blank space at the bottom of page 1/
+%%% column 1. This space can optionally be filled with some text using the
+%%% \toappear{...} command. If used, this command must be BEFORE the \maketitle
+%%% command. If this command is defined AND [preprint] is on, then the
+%%% space is filled with the {...} text (at the bottom); otherwise, it is
+%%% blank. If you use \toappearbox{...} instead of \toappear{...} then a
+%%% box will be drawn around the text (if [preprint] is on).
+%%%
+%%% A typical usage looks like this:
+%%% \toappear{To appear in the Ninth AES Conference on Medievil Lithuanian
+%%% Embalming Technique, June 1991, Alfaretta, Georgia.}
+%%% This will be included in the preprint, and left out of the conference
+%%% version.
+%%%
+%%% WARNING:
+%%% Some dvi-ps converters heuristically allow chars to drift from their
+%%% true positions a few pixels. This may be noticeable with the 9pt sans-serif
+%%% bold font used for section headers.
+%%% You may turn this hackery off via the -e option:
+%%% dvips -e 0 foo.dvi >foo.ps
+%%%
+\typeout{Document Class 'acm_proc_article-sp' <22nd. April '09>. Modified by G.K.M. Tobin}
+\typeout{Based in part upon document Style `acmconf' <22 May 89>. Hacked 4/91 by}
+\typeout{shivers@cs.cmu.edu, 4/93 by theobald@cs.mcgill.ca}
+\typeout{Excerpts were taken from (Journal Style) 'esub2acm.cls'.}
+\typeout{****** Bugs/comments/suggestions to Gerry Murray -- murray@hq.acm.org ******}
+
+\oddsidemargin 4.5pc
+\evensidemargin 4.5pc
+\advance\oddsidemargin by -1in % Correct for LaTeX gratuitousness
+\advance\evensidemargin by -1in % Correct for LaTeX gratuitousness
+\marginparwidth 0pt % Margin pars are not allowed.
+\marginparsep 11pt % Horizontal space between outer margin and
+ % marginal note
+
+ % Top of page:
+\topmargin 4.5pc % Nominal distance from top of page to top of
+ % box containing running head.
+\advance\topmargin by -1in % Correct for LaTeX gratuitousness
+\headheight 0pt % Height of box containing running head.
+\headsep 0pt % Space between running head and text.
+ % Bottom of page:
+\footskip 30pt % Distance from baseline of box containing foot
+ % to baseline of last line of text.
+\@ifundefined{footheight}{\newdimen\footheight}{}% this is for LaTeX2e
+\footheight 12pt % Height of box containing running foot.
+
+
+%% Must redefine the top margin so there's room for headers and
+%% page numbers if you are using the preprint option. Footers
+%% are OK as is. Olin.
+\advance\topmargin by -37pt % Leave 37pt above text for headers
+\headheight 12pt % Height of box containing running head.
+\headsep 25pt % Space between running head and text.
+
+\textheight 666pt % 9 1/4 column height
+\textwidth 42pc % Width of text line.
+ % For two-column mode:
+\columnsep 2pc % Space between columns
+\columnseprule 0pt % Width of rule between columns.
+\hfuzz 1pt % Allow some variation in column width, otherwise it's
+ % too hard to typeset in narrow columns.
+
+\footnotesep 5.6pt % Height of strut placed at the beginning of every
+ % footnote =3D height of normal \footnotesize strut,
+ % so no extra space between footnotes.
+
+\skip\footins 8.1pt plus 4pt minus 2pt % Space between last line of text and
+ % top of first footnote.
+\floatsep 11pt plus 2pt minus 2pt % Space between adjacent floats moved
+ % to top or bottom of text page.
+\textfloatsep 18pt plus 2pt minus 4pt % Space between main text and floats
+ % at top or bottom of page.
+\intextsep 11pt plus 2pt minus 2pt % Space between in-text figures and
+ % text.
+\@ifundefined{@maxsep}{\newdimen\@maxsep}{}% this is for LaTeX2e
+\@maxsep 18pt % The maximum of \floatsep,
+ % \textfloatsep and \intextsep (minus
+ % the stretch and shrink).
+\dblfloatsep 11pt plus 2pt minus 2pt % Same as \floatsep for double-column
+ % figures in two-column mode.
+\dbltextfloatsep 18pt plus 2pt minus 4pt% \textfloatsep for double-column
+ % floats.
+\@ifundefined{@dblmaxsep}{\newdimen\@dblmaxsep}{}% this is for LaTeX2e
+\@dblmaxsep 18pt % The maximum of \dblfloatsep and
+ % \dbltexfloatsep.
+\@fptop 0pt plus 1fil % Stretch at top of float page/column. (Must be
+ % 0pt plus ...)
+\@fpsep 8pt plus 2fil % Space between floats on float page/column.
+\@fpbot 0pt plus 1fil % Stretch at bottom of float page/column. (Must be
+ % 0pt plus ... )
+\@dblfptop 0pt plus 1fil % Stretch at top of float page. (Must be 0pt plus ...)
+\@dblfpsep 8pt plus 2fil % Space between floats on float page.
+\@dblfpbot 0pt plus 1fil % Stretch at bottom of float page. (Must be
+ % 0pt plus ... )
+\marginparpush 5pt % Minimum vertical separation between two marginal
+ % notes.
+
+\parskip 0pt % Extra vertical space between paragraphs.
+ % Set to 0pt outside sections, to keep section heads
+ % uniformly spaced. The value of parskip is set
+ % to leading value _within_ sections.
+ % 12 Jan 2000 gkmt
+\parindent 0pt % Width of paragraph indentation.
+\partopsep 2pt plus 1pt minus 1pt% Extra vertical space, in addition to
+ % \parskip and \topsep, added when user
+ % leaves blank line before environment.
+
+\@lowpenalty 51 % Produced by \nopagebreak[1] or \nolinebreak[1]
+\@medpenalty 151 % Produced by \nopagebreak[2] or \nolinebreak[2]
+\@highpenalty 301 % Produced by \nopagebreak[3] or \nolinebreak[3]
+
+\@beginparpenalty -\@lowpenalty % Before a list or paragraph environment.
+\@endparpenalty -\@lowpenalty % After a list or paragraph environment.
+\@itempenalty -\@lowpenalty % Between list items.
+
+%\@namedef{ds@10pt}{\@latexerr{The `10pt' option is not allowed in the `acmconf'
+\@namedef{ds@10pt}{\ClassError{The `10pt' option is not allowed in the `acmconf' % January 2008
+ document style.}\@eha}
+%\@namedef{ds@11pt}{\@latexerr{The `11pt' option is not allowed in the `acmconf'
+\@namedef{ds@11pt}{\ClassError{The `11pt' option is not allowed in the `acmconf' % January 2008
+ document style.}\@eha}
+%\@namedef{ds@12pt}{\@latexerr{The `12pt' option is not allowed in the `acmconf'
+\@namedef{ds@12pt}{\ClassError{The `12pt' option is not allowed in the `acmconf' % January 2008
+ document style.}\@eha}
+
+\@options
+
+\lineskip 2pt % \lineskip is 1pt for all font sizes.
+\normallineskip 2pt
+\def\baselinestretch{1}
+
+\abovedisplayskip 9pt plus2pt minus4.5pt%
+\belowdisplayskip \abovedisplayskip
+\abovedisplayshortskip \z@ plus3pt%
+\belowdisplayshortskip 5.4pt plus3pt minus3pt%
+\let\@listi\@listI % Setting of \@listi added 9 Jun 87
+
+\def\small{\@setsize\small{9pt}\viiipt\@viiipt
+\abovedisplayskip 7.6pt plus 3pt minus 4pt%
+\belowdisplayskip \abovedisplayskip
+\abovedisplayshortskip \z@ plus2pt%
+\belowdisplayshortskip 3.6pt plus2pt minus 2pt
+\def\@listi{\leftmargin\leftmargini %% Added 22 Dec 87
+\topsep 4pt plus 2pt minus 2pt\parsep 2pt plus 1pt minus 1pt
+\itemsep \parsep}}
+
+\def\footnotesize{\@setsize\footnotesize{9pt}\ixpt\@ixpt
+\abovedisplayskip 6.4pt plus 2pt minus 4pt%
+\belowdisplayskip \abovedisplayskip
+\abovedisplayshortskip \z@ plus 1pt%
+\belowdisplayshortskip 2.7pt plus 1pt minus 2pt
+\def\@listi{\leftmargin\leftmargini %% Added 22 Dec 87
+\topsep 3pt plus 1pt minus 1pt\parsep 2pt plus 1pt minus 1pt
+\itemsep \parsep}}
+
+\newcount\aucount
+\newcount\originalaucount
+\newdimen\auwidth
+\auwidth 130pc
+\newdimen\auskip
+\newcount\auskipcount
+\newdimen\auskip
+\global\auskip=1pc
+\newdimen\allauboxes
+\allauboxes=\auwidth
+\newtoks\addauthors
+\newcount\addauflag
+\global\addauflag=0 %Haven't shown additional authors yet
+
+\newtoks\subtitletext
+\gdef\subtitle#1{\subtitletext={#1}}
+
+\gdef\additionalauthors#1{\addauthors={#1}}
+
+\gdef\numberofauthors#1{\global\aucount=#1
+\ifnum\aucount>3\global\originalaucount=\aucount \global\aucount=3\fi %g} % 3 OK - Gerry March 2007
+\global\auskipcount=\aucount\global\advance\auskipcount by 1
+\global\multiply\auskipcount by 2
+\global\multiply\auskip by \auskipcount
+\global\advance\auwidth by -\auskip
+\global\divide\auwidth by \aucount}
+
+% \and was modified to count the number of authors. GKMT 12 Aug 1999
+\def\alignauthor{% % \begin{tabular}
+\end{tabular}%
+ \begin{tabular}[t]{p{\auwidth}}\centering}%
+
+
+% *** NOTE *** NOTE *** NOTE *** NOTE ***
+% If you have 'font problems' then you may need
+% to change these, e.g. 'arialb' instead of "arialbd".
+% Gerry Murray 11/11/1999
+% *** OR ** comment out block A and activate block B or vice versa.
+% **********************************************
+%
+% -- Start of block A -- (Type 1 or Truetype fonts)
+%\newfont{\secfnt}{timesbd at 12pt} % was timenrb originally - now is timesbd
+%\newfont{\secit}{timesbi at 12pt} %13 Jan 00 gkmt
+%\newfont{\subsecfnt}{timesi at 11pt} % was timenrri originally - now is timesi
+%\newfont{\subsecit}{timesbi at 11pt} % 13 Jan 00 gkmt -- was times changed to timesbi gm 2/4/2000
+% % because "normal" is italic, "italic" is Roman
+%\newfont{\ttlfnt}{arialbd at 18pt} % was arialb originally - now is arialbd
+%\newfont{\ttlit}{arialbi at 18pt} % 13 Jan 00 gkmt
+%\newfont{\subttlfnt}{arial at 14pt} % was arialr originally - now is arial
+%\newfont{\subttlit}{ariali at 14pt} % 13 Jan 00 gkmt
+%\newfont{\subttlbf}{arialbd at 14pt} % 13 Jan 00 gkmt
+%\newfont{\aufnt}{arial at 12pt} % was arialr originally - now is arial
+%\newfont{\auit}{ariali at 12pt} % 13 Jan 00 gkmt
+%\newfont{\affaddr}{arial at 10pt} % was arialr originally - now is arial
+%\newfont{\affaddrit}{ariali at 10pt} %13 Jan 00 gkmt
+%\newfont{\eaddfnt}{arial at 12pt} % was arialr originally - now is arial
+%\newfont{\ixpt}{times at 9pt} % was timenrr originally - now is times
+%\newfont{\confname}{timesi at 8pt} % was timenrri - now is timesi
+%\newfont{\crnotice}{times at 8pt} % was timenrr originally - now is times
+%\newfont{\ninept}{times at 9pt} % was timenrr originally - now is times
+% *********************************************
+% -- End of block A --
+%
+%
+% -- Start of block B -- UPDATED FONT NAMES
+% *********************************************
+% Gerry Murray 11/30/2006
+% *********************************************
+\newfont{\secfnt}{ptmb8t at 12pt}
+\newfont{\secit}{ptmbi8t at 12pt} %13 Jan 00 gkmt
+\newfont{\subsecfnt}{ptmri8t at 11pt}
+\newfont{\subsecit}{ptmbi8t at 11pt} %
+\newfont{\ttlfnt}{phvb8t at 18pt}
+\newfont{\ttlit}{phvbo8t at 18pt} % GM 2/4/2000
+\newfont{\subttlfnt}{phvr8t at 14pt}
+\newfont{\subttlit}{phvro8t at 14pt} % GM 2/4/2000
+\newfont{\subttlbf}{phvb8t at 14pt} % 13 Jan 00 gkmt
+\newfont{\aufnt}{phvr8t at 12pt}
+\newfont{\auit}{phvro8t at 12pt} % GM 2/4/2000
+\newfont{\affaddr}{phvr8t at 10pt}
+\newfont{\affaddrit}{phvro8t at 10pt} % GM 2/4/2000
+\newfont{\eaddfnt}{phvr8t at 12pt}
+\newfont{\ixpt}{ptmr8t at 9pt}
+\newfont{\confname}{ptmri8t at 8pt}
+\newfont{\crnotice}{ptmr8t at 8pt}
+\newfont{\ninept}{ptmr8t at 9pt}
+% +++++++++++++++++++++++++++++++++++++++++++++
+% -- End of block B --
+
+%\def\email#1{{{\eaddfnt{\vskip 4pt#1}}}}
+% If we have an email, inside a "shared affiliation" then we need the following instead
+\def\email#1{{{\eaddfnt{\par #1}}}} % revised - GM - 11/30/2006
+
+\def\addauthorsection{\ifnum\originalaucount>6 % was 3 - Gerry March 2007
+ \section{Additional Authors}\the\addauthors
+ \fi}
+
+\newcount\savesection
+\newcount\sectioncntr
+\global\sectioncntr=1
+
+\setcounter{secnumdepth}{3}
+
+\def\appendix{\par
+\section*{APPENDIX}
+\setcounter{section}{0}
+ \setcounter{subsection}{0}
+ \def\thesection{\Alph{section}} }
+
+
+\leftmargini 22.5pt
+\leftmarginii 19.8pt % > \labelsep + width of '(m)'
+\leftmarginiii 16.8pt % > \labelsep + width of 'vii.'
+\leftmarginiv 15.3pt % > \labelsep + width of 'M.'
+\leftmarginv 9pt
+\leftmarginvi 9pt
+
+\leftmargin\leftmargini
+\labelsep 4.5pt
+\labelwidth\leftmargini\advance\labelwidth-\labelsep
+
+\def\@listI{\leftmargin\leftmargini \parsep 3.6pt plus 2pt minus 1pt%
+\topsep 7.2pt plus 2pt minus 4pt%
+\itemsep 3.6pt plus 2pt minus 1pt}
+
+\let\@listi\@listI
+\@listi
+
+\def\@listii{\leftmargin\leftmarginii
+ \labelwidth\leftmarginii\advance\labelwidth-\labelsep
+ \topsep 3.6pt plus 2pt minus 1pt
+ \parsep 1.8pt plus 0.9pt minus 0.9pt
+ \itemsep \parsep}
+
+\def\@listiii{\leftmargin\leftmarginiii
+ \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
+ \topsep 1.8pt plus 0.9pt minus 0.9pt
+ \parsep \z@ \partopsep 1pt plus 0pt minus 1pt
+ \itemsep \topsep}
+
+\def\@listiv{\leftmargin\leftmarginiv
+ \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
+
+\def\@listv{\leftmargin\leftmarginv
+ \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
+
+\def\@listvi{\leftmargin\leftmarginvi
+ \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
+
+\def\labelenumi{\theenumi.}
+\def\theenumi{\arabic{enumi}}
+
+\def\labelenumii{(\theenumii)}
+\def\theenumii{\alph{enumii}}
+\def\p@enumii{\theenumi}
+
+\def\labelenumiii{\theenumiii.}
+\def\theenumiii{\roman{enumiii}}
+\def\p@enumiii{\theenumi(\theenumii)}
+
+\def\labelenumiv{\theenumiv.}
+\def\theenumiv{\Alph{enumiv}}
+\def\p@enumiv{\p@enumiii\theenumiii}
+
+\def\labelitemi{$\bullet$}
+\def\labelitemii{\bf --}
+\def\labelitemiii{$\ast$}
+\def\labelitemiv{$\cdot$}
+
+\def\verse{\let\\=\@centercr
+ \list{}{\itemsep\z@ \itemindent -1.5em\listparindent \itemindent
+ \rightmargin\leftmargin\advance\leftmargin 1.5em}\item[]}
+\let\endverse\endlist
+
+\def\quotation{\list{}{\listparindent 1.5em
+ \itemindent\listparindent
+ \rightmargin\leftmargin \parsep 0pt plus 1pt}\item[]}
+\let\endquotation=\endlist
+
+\def\quote{\list{}{\rightmargin\leftmargin}\item[]}
+\let\endquote=\endlist
+
+\def\descriptionlabel#1{\hspace\labelsep \bf #1}
+\def\description{\list{}{\labelwidth\z@ \itemindent-\leftmargin
+ \let\makelabel\descriptionlabel}}
+
+\let\enddescription\endlist
+
+\def\theequation{\arabic{equation}}
+
+\arraycolsep 4.5pt % Half the space between columns in an array environment.
+\tabcolsep 5.4pt % Half the space between columns in a tabular environment.
+\arrayrulewidth .5pt % Width of rules in array and tabular environment. % (was .4) updated Gerry March 20 2007
+\doublerulesep 1.8pt % Space between adjacent rules in array or tabular env.
+
+\tabbingsep \labelsep % Space used by the \' command. (See LaTeX manual.)
+
+\skip\@mpfootins =\skip\footins
+
+\fboxsep =2.7pt % Space left between box and text by \fbox and \framebox.
+\fboxrule =.5pt % Width of rules in box made by \fbox and \framebox. % (was .4) updated Gerry March 20 2007
+
+\def\thepart{\Roman{part}} % Roman numeral part numbers.
+\def\thesection {\arabic{section}}
+\def\thesubsection {\thesection.\arabic{subsection}}
+%\def\thesubsubsection {\thesubsection.\arabic{subsubsection}} % GM 7/30/2002
+%\def\theparagraph {\thesubsubsection.\arabic{paragraph}} % GM 7/30/2002
+\def\thesubparagraph {\theparagraph.\arabic{subparagraph}}
+
+\def\@pnumwidth{1.55em}
+\def\@tocrmarg {2.55em}
+\def\@dotsep{4.5}
+\setcounter{tocdepth}{3}
+
+%\def\tableofcontents{\@latexerr{\tableofcontents: Tables of contents are not
+% allowed in the `acmconf' document style.}\@eha}
+
+\def\tableofcontents{\ClassError{%
+ \string\tableofcontents\space is not allowed in the `acmconf' document % January 2008
+ style}\@eha}
+
+\def\l@part#1#2{\addpenalty{\@secpenalty}
+ \addvspace{2.25em plus 1pt} % space above part line
+ \begingroup
+ \@tempdima 3em % width of box holding part number, used by
+ \parindent \z@ \rightskip \@pnumwidth %% \numberline
+ \parfillskip -\@pnumwidth
+ {\large \bf % set line in \large boldface
+ \leavevmode % TeX command to enter horizontal mode.
+ #1\hfil \hbox to\@pnumwidth{\hss #2}}\par
+ \nobreak % Never break after part entry
+ \endgroup}
+
+\def\l@section#1#2{\addpenalty{\@secpenalty} % good place for page break
+ \addvspace{1.0em plus 1pt} % space above toc entry
+ \@tempdima 1.5em % width of box holding section number
+ \begingroup
+ \parindent \z@ \rightskip \@pnumwidth
+ \parfillskip -\@pnumwidth
+ \bf % Boldface.
+ \leavevmode % TeX command to enter horizontal mode.
+ \advance\leftskip\@tempdima %% added 5 Feb 88 to conform to
+ \hskip -\leftskip %% 25 Jan 88 change to \numberline
+ #1\nobreak\hfil \nobreak\hbox to\@pnumwidth{\hss #2}\par
+ \endgroup}
+
+
+\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}}
+\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}}
+\def\l@paragraph{\@dottedtocline{4}{7.0em}{4.1em}}
+\def\l@subparagraph{\@dottedtocline{5}{10em}{5em}}
+
+%\def\listoffigures{\@latexerr{\listoffigures: Lists of figures are not
+% allowed in the `acmconf' document style.}\@eha}
+
+\def\listoffigures{\ClassError{%
+ \string\listoffigures\space is not allowed in the `acmconf' document % January 2008
+ style}\@eha}
+
+\def\l@figure{\@dottedtocline{1}{1.5em}{2.3em}}
+
+%\def\listoftables{\@latexerr{\listoftables: Lists of tables are not
+% allowed in the `acmconf' document style.}\@eha}
+%\let\l@table\l@figure
+
+\def\listoftables{\ClassError{%
+ \string\listoftables\space is not allowed in the `acmconf' document % January 2008
+ style}\@eha}
+ \let\l@table\l@figure
+
+\def\footnoterule{\kern-3\p@
+ \hrule width .5\columnwidth % (was .4) updated Gerry March 20 2007
+ \kern 2.6\p@} % The \hrule has default height of .4pt % (was .4) updated Gerry March 20 2007
+% ------
+\long\def\@makefntext#1{\noindent
+%\hbox to .5em{\hss$^{\@thefnmark}$}#1} % original
+\hbox to .5em{\hss\textsuperscript{\@thefnmark}}#1} % C. Clifton / GM Oct. 2nd. 2002
+% -------
+
+\long\def\@maketntext#1{\noindent
+#1}
+
+\long\def\@maketitlenotetext#1#2{\noindent
+ \hbox to 1.8em{\hss$^{#1}$}#2}
+
+\setcounter{topnumber}{2}
+\def\topfraction{.7}
+\setcounter{bottomnumber}{1}
+\def\bottomfraction{.3}
+\setcounter{totalnumber}{3}
+\def\textfraction{.2}
+\def\floatpagefraction{.5}
+\setcounter{dbltopnumber}{2}
+\def\dbltopfraction{.7}
+\def\dblfloatpagefraction{.5}
+
+\long\def\@makecaption#1#2{
+ \vskip \baselineskip
+ \setbox\@tempboxa\hbox{\textbf{#1: #2}}
+ \ifdim \wd\@tempboxa >\hsize % IF longer than one line:
+ \textbf{#1: #2}\par % THEN set as ordinary paragraph.
+ \else % ELSE center.
+ \hbox to\hsize{\hfil\box\@tempboxa\hfil}\par
+ \fi}
+
+\@ifundefined{figure}{\newcounter {figure}} % this is for LaTeX2e
+
+\def\fps@figure{tbp}
+\def\ftype@figure{1}
+\def\ext@figure{lof}
+\def\fnum@figure{Figure \thefigure}
+\def\figure{\@float{figure}}
+%\let\endfigure\end@float
+\def\endfigure{\end@float} % Gerry January 2008
+\@namedef{figure*}{\@dblfloat{figure}}
+\@namedef{endfigure*}{\end@dblfloat}
+
+\@ifundefined{table}{\newcounter {table}} % this is for LaTeX2e
+
+\def\fps@table{tbp}
+\def\ftype@table{2}
+\def\ext@table{lot}
+\def\fnum@table{Table \thetable}
+\def\table{\@float{table}}
+%\let\endtable\end@float
+\def\endtable{\end@float} % Gerry January 2008
+\@namedef{table*}{\@dblfloat{table}}
+\@namedef{endtable*}{\end@dblfloat}
+
+\newtoks\titleboxnotes
+\newcount\titleboxnoteflag
+
+\def\maketitle{\par
+ \begingroup
+ \def\thefootnote{\fnsymbol{footnote}}
+ \def\@makefnmark{\hbox
+ to 0pt{$^{\@thefnmark}$\hss}}
+ \twocolumn[\@maketitle]
+\@thanks
+ \endgroup
+ \setcounter{footnote}{0}
+ \let\maketitle\relax
+ \let\@maketitle\relax
+ \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\gdef\@subtitle{}\let\thanks\relax}
+
+%% CHANGES ON NEXT LINES
+\newif\if@ll % to record which version of LaTeX is in use
+
+\expandafter\ifx\csname LaTeXe\endcsname\relax % LaTeX2.09 is used
+\else% LaTeX2e is used, so set ll to true
+\global\@lltrue
+\fi
+
+\if@ll
+ \NeedsTeXFormat{LaTeX2e}
+ \ProvidesClass{acm_proc_article-sp} [2009/04/22 - V3.2SP - based on esub2acm.sty <23 April 96>]
+ \RequirePackage{latexsym}% QUERY: are these two really needed?
+ \let\dooptions\ProcessOptions
+\else
+ \let\dooptions\@options
+\fi
+%% END CHANGES
+
+\def\@height{height}
+\def\@width{width}
+\def\@minus{minus}
+\def\@plus{plus}
+\def\hb@xt@{\hbox to}
+\newif\if@faircopy
+\@faircopyfalse
+\def\ds@faircopy{\@faircopytrue}
+
+\def\ds@preprint{\@faircopyfalse}
+
+\@twosidetrue
+\@mparswitchtrue
+\def\ds@draft{\overfullrule 5\p@}
+%% CHANGE ON NEXT LINE
+\dooptions
+
+\lineskip \p@
+\normallineskip \p@
+\def\baselinestretch{1}
+\def\@ptsize{0} %needed for amssymbols.sty
+
+%% CHANGES ON NEXT LINES
+\if@ll% allow use of old-style font change commands in LaTeX2e
+\@maxdepth\maxdepth
+%
+\DeclareOldFontCommand{\rm}{\ninept\rmfamily}{\mathrm}
+\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
+\DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt}
+\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf}
+\DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit}
+\DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl}
+\DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc}
+\DeclareRobustCommand*{\cal}{\@fontswitch{\relax}{\mathcal}}
+\DeclareRobustCommand*{\mit}{\@fontswitch{\relax}{\mathnormal}}
+\fi
+%
+\if@ll
+ \renewcommand{\rmdefault}{cmr} % was 'ttm'
+% Note! I have also found 'mvr' to work ESPECIALLY well.
+% Gerry - October 1999
+% You may need to change your LV1times.fd file so that sc is
+% mapped to cmcsc - -for smallcaps -- that is if you decide
+% to change {cmr} to {times} above. (Not recommended)
+ \renewcommand{\@ptsize}{}
+ \renewcommand{\normalsize}{%
+ \@setfontsize\normalsize\@ixpt{10.5\p@}%\ninept%
+ \abovedisplayskip 6\p@ \@plus2\p@ \@minus\p@
+ \belowdisplayskip \abovedisplayskip
+ \abovedisplayshortskip 6\p@ \@minus 3\p@
+ \belowdisplayshortskip 6\p@ \@minus 3\p@
+ \let\@listi\@listI
+ }
+\else
+ \def\@normalsize{%changed next to 9 from 10
+ \@setsize\normalsize{9\p@}\ixpt\@ixpt
+ \abovedisplayskip 6\p@ \@plus2\p@ \@minus\p@
+ \belowdisplayskip \abovedisplayskip
+ \abovedisplayshortskip 6\p@ \@minus 3\p@
+ \belowdisplayshortskip 6\p@ \@minus 3\p@
+ \let\@listi\@listI
+ }%
+\fi
+\if@ll
+ \newcommand\scriptsize{\@setfontsize\scriptsize\@viipt{8\p@}}
+ \newcommand\tiny{\@setfontsize\tiny\@vpt{6\p@}}
+ \newcommand\large{\@setfontsize\large\@xiipt{14\p@}}
+ \newcommand\Large{\@setfontsize\Large\@xivpt{18\p@}}
+ \newcommand\LARGE{\@setfontsize\LARGE\@xviipt{20\p@}}
+ \newcommand\huge{\@setfontsize\huge\@xxpt{25\p@}}
+ \newcommand\Huge{\@setfontsize\Huge\@xxvpt{30\p@}}
+\else
+ \def\scriptsize{\@setsize\scriptsize{8\p@}\viipt\@viipt}
+ \def\tiny{\@setsize\tiny{6\p@}\vpt\@vpt}
+ \def\large{\@setsize\large{14\p@}\xiipt\@xiipt}
+ \def\Large{\@setsize\Large{18\p@}\xivpt\@xivpt}
+ \def\LARGE{\@setsize\LARGE{20\p@}\xviipt\@xviipt}
+ \def\huge{\@setsize\huge{25\p@}\xxpt\@xxpt}
+ \def\Huge{\@setsize\Huge{30\p@}\xxvpt\@xxvpt}
+\fi
+\normalsize
+
+% make aubox hsize/number of authors up to 3, less gutter
+% then showbox gutter showbox gutter showbox -- GKMT Aug 99
+\newbox\@acmtitlebox
+\def\@maketitle{\newpage
+ \null
+ \setbox\@acmtitlebox\vbox{%
+\baselineskip 20pt
+\vskip 2em % Vertical space above title.
+ \begin{center}
+ {\ttlfnt \@title\par} % Title set in 18pt Helvetica (Arial) bold size.
+ \vskip 1.5em % Vertical space after title.
+%This should be the subtitle.
+{\subttlfnt \the\subtitletext\par}\vskip 1.25em%\fi
+ {\baselineskip 16pt\aufnt % each author set in \12 pt Arial, in a
+ \lineskip .5em % tabular environment
+ \begin{tabular}[t]{c}\@author
+ \end{tabular}\par}
+ \vskip 1.5em % Vertical space after author.
+ \end{center}}
+ \dimen0=\ht\@acmtitlebox
+ \advance\dimen0 by -12.75pc\relax % Increased space for title box -- KBT
+ \unvbox\@acmtitlebox
+ \ifdim\dimen0<0.0pt\relax\vskip-\dimen0\fi}
+
+
+\newcount\titlenotecount
+\global\titlenotecount=0
+\newtoks\tntoks
+\newtoks\tntokstwo
+\newtoks\tntoksthree
+\newtoks\tntoksfour
+\newtoks\tntoksfive
+
+\def\abstract{
+\ifnum\titlenotecount>0 % was =1
+ \insert\footins{%
+ \reset@font\footnotesize
+ \interlinepenalty\interfootnotelinepenalty
+ \splittopskip\footnotesep
+ \splitmaxdepth \dp\strutbox \floatingpenalty \@MM
+ \hsize\columnwidth \@parboxrestore
+ \protected@edef\@currentlabel{%
+ }%
+ \color@begingroup
+\ifnum\titlenotecount=1
+ \@maketntext{%
+ \raisebox{4pt}{$\ast$}\rule\z@\footnotesep\ignorespaces\the\tntoks\@finalstrut\strutbox}%
+\fi
+\ifnum\titlenotecount=2
+ \@maketntext{%
+ \raisebox{4pt}{$\ast$}\rule\z@\footnotesep\ignorespaces\the\tntoks\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\dagger$}\rule\z@\footnotesep\ignorespaces\the\tntokstwo\@finalstrut\strutbox}%
+\fi
+\ifnum\titlenotecount=3
+ \@maketntext{%
+ \raisebox{4pt}{$\ast$}\rule\z@\footnotesep\ignorespaces\the\tntoks\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\dagger$}\rule\z@\footnotesep\ignorespaces\the\tntokstwo\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\ddagger$}\rule\z@\footnotesep\ignorespaces\the\tntoksthree\@finalstrut\strutbox}%
+\fi
+\ifnum\titlenotecount=4
+ \@maketntext{%
+ \raisebox{4pt}{$\ast$}\rule\z@\footnotesep\ignorespaces\the\tntoks\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\dagger$}\rule\z@\footnotesep\ignorespaces\the\tntokstwo\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\ddagger$}\rule\z@\footnotesep\ignorespaces\the\tntoksthree\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\S$}\rule\z@\footnotesep\ignorespaces\the\tntoksfour\@finalstrut\strutbox}%
+\fi
+\ifnum\titlenotecount=5
+ \@maketntext{%
+ \raisebox{4pt}{$\ast$}\rule\z@\footnotesep\ignorespaces\the\tntoks\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\dagger$}\rule\z@\footnotesep\ignorespaces\the\tntokstwo\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\ddagger$}\rule\z@\footnotesep\ignorespaces\the\tntoksthree\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\S$}\rule\z@\footnotesep\ignorespaces\the\tntoksfour\par\@finalstrut\strutbox}%
+\@maketntext{%
+ \raisebox{4pt}{$\P$}\rule\z@\footnotesep\ignorespaces\the\tntoksfive\@finalstrut\strutbox}%
+\fi
+ \color@endgroup} %g}
+\fi
+\setcounter{footnote}{0}
+\section*{ABSTRACT}\normalsize %\the\parskip \the\baselineskip%\ninept
+}
+
+\def\endabstract{\if@twocolumn\else\endquotation\fi}
+
+\def\keywords{\if@twocolumn
+\section*{Keywords}
+\else \small
+\quotation
+\fi}
+
+% I've pulled the check for 2 cols, since proceedings are _always_
+% two-column 11 Jan 2000 gkmt
+\def\terms{%\if@twocolumn
+\section*{General Terms}
+%\else \small
+%\quotation\the\parskip
+%\fi}
+}
+
+% -- Classification needs to be a bit smart due to optionals - Gerry/Georgia November 2nd. 1999
+\newcount\catcount
+\global\catcount=1
+
+\def\category#1#2#3{%
+\ifnum\catcount=1
+\section*{Categories and Subject Descriptors}
+\advance\catcount by 1\else{\unskip; }\fi
+ \@ifnextchar [{\@category{#1}{#2}{#3}}{\@category{#1}{#2}{#3}[]}%
+}
+
+\def\@category#1#2#3[#4]{%
+ \begingroup
+ \let\and\relax
+ #1 [\textbf{#2}]%
+ \if!#4!%
+ \if!#3!\else : #3\fi
+ \else
+ :\space
+ \if!#3!\else #3\kern\z@---\hskip\z@\fi
+ \textit{#4}%
+ \fi
+ \endgroup
+}
+%
+
+%%% This section (written by KBT) handles the 1" box in the lower left
+%%% corner of the left column of the first page by creating a picture,
+%%% and inserting the predefined string at the bottom (with a negative
+%%% displacement to offset the space allocated for a non-existent
+%%% caption).
+%%%
+
+\def\@toappear{} % Default setting blank - commands below change this.
+\long\def\toappear#1{\def\@toappear{\parbox[b]{20pc}{\baselineskip 9pt#1}}}
+\def\toappearbox#1{\def\@toappear{\raisebox{5pt}{\framebox[20pc]{\parbox[b]{19pc}{#1}}}}}
+
+\newtoks\conf
+\newtoks\confinfo
+\def\conferenceinfo#1#2{\global\conf={#1}\global\confinfo{#2}}
+
+
+%\def\marginpar{\@latexerr{The \marginpar command is not allowed in the
+% `acmconf' document style.}\@eha}
+
+\def\marginpar{\ClassError{%
+ \string\marginpar\space is not allowed in the `acmconf' document % January 2008
+ style}\@eha}
+
+\mark{{}{}} % Initializes TeX's marks
+
+\def\today{\ifcase\month\or
+ January\or February\or March\or April\or May\or June\or
+ July\or August\or September\or October\or November\or December\fi
+ \space\number\day, \number\year}
+
+\def\@begintheorem#1#2{%
+ \trivlist
+ \item[%
+ \hskip 10\p@
+ \hskip \labelsep
+ {{\sc #1}\hskip 5\p@\relax#2.}%
+ ]
+ \it
+}
+\def\@opargbegintheorem#1#2#3{%
+ \trivlist
+ \item[%
+ \hskip 10\p@
+ \hskip \labelsep
+ {\sc #1\ #2\ % This mod by Gerry to enumerate corollaries
+ \setbox\@tempboxa\hbox{(#3)} % and bracket the 'corollary title'
+ \ifdim \wd\@tempboxa>\z@ % and retain the correct numbering of e.g. theorems
+ \hskip 5\p@\relax % if they occur 'around' said corollaries.
+ \box\@tempboxa % Gerry - Nov. 1999.
+ \fi.}%
+ ]
+ \it
+}
+\newif\if@qeded
+\global\@qededfalse
+
+% -- original
+%\def\proof{%
+% \vspace{-\parskip} % GM July 2000 (for tighter spacing)
+% \global\@qededfalse
+% \@ifnextchar[{\@xproof}{\@proof}%
+%}
+% -- end of original
+
+% (JSS) Fix for vertical spacing bug - Gerry Murray July 30th. 2002
+\def\proof{%
+\vspace{-\lastskip}\vspace{-\parsep}\penalty-51%
+\global\@qededfalse
+\@ifnextchar[{\@xproof}{\@proof}%
+}
+
+\def\endproof{%
+ \if@qeded\else\qed\fi
+ \endtrivlist
+}
+\def\@proof{%
+ \trivlist
+ \item[%
+ \hskip 10\p@
+ \hskip \labelsep
+ {\sc Proof.}%
+ ]
+ \ignorespaces
+}
+\def\@xproof[#1]{%
+ \trivlist
+ \item[\hskip 10\p@\hskip \labelsep{\sc Proof #1.}]%
+ \ignorespaces
+}
+\def\qed{%
+ \unskip
+ \kern 10\p@
+ \begingroup
+ \unitlength\p@
+ \linethickness{.4\p@}%
+ \framebox(6,6){}%
+ \endgroup
+ \global\@qededtrue
+}
+
+\def\newdef#1#2{%
+ \expandafter\@ifdefinable\csname #1\endcsname
+ {\@definecounter{#1}%
+ \expandafter\xdef\csname the#1\endcsname{\@thmcounter{#1}}%
+ \global\@namedef{#1}{\@defthm{#1}{#2}}%
+ \global\@namedef{end#1}{\@endtheorem}%
+ }%
+}
+\def\@defthm#1#2{%
+ \refstepcounter{#1}%
+ \@ifnextchar[{\@ydefthm{#1}{#2}}{\@xdefthm{#1}{#2}}%
+}
+\def\@xdefthm#1#2{%
+ \@begindef{#2}{\csname the#1\endcsname}%
+ \ignorespaces
+}
+\def\@ydefthm#1#2[#3]{%
+ \trivlist
+ \item[%
+ \hskip 10\p@
+ \hskip \labelsep
+ {\it #2%
+% \savebox\@tempboxa{#3}%
+ \saveb@x\@tempboxa{#3}% % January 2008
+ \ifdim \wd\@tempboxa>\z@
+ \ \box\@tempboxa
+ \fi.%
+ }]%
+ \ignorespaces
+}
+\def\@begindef#1#2{%
+ \trivlist
+ \item[%
+ \hskip 10\p@
+ \hskip \labelsep
+ {\it #1\ \rm #2.}%
+ ]%
+}
+\def\theequation{\arabic{equation}}
+
+\newcounter{part}
+\newcounter{section}
+\newcounter{subsection}[section]
+\newcounter{subsubsection}[subsection]
+\newcounter{paragraph}[subsubsection]
+\def\thepart{\Roman{part}}
+\def\thesection{\arabic{section}}
+\def\thesubsection{\thesection.\arabic{subsection}}
+\def\thesubsubsection{\thesubsection.\arabic{subsubsection}} %removed \subsecfnt 29 July 2002 gkmt
+\def\theparagraph{\thesubsubsection.\arabic{paragraph}} %removed \subsecfnt 29 July 2002 gkmt
+
+\newif\if@uchead
+\@ucheadfalse
+
+%% CHANGES: NEW NOTE
+%% NOTE: OK to use old-style font commands below, since they were
+%% suitably redefined for LaTeX2e
+%% END CHANGES
+\setcounter{secnumdepth}{3}
+\def\part{%
+ \@startsection{part}{9}{\z@}{-10\p@ \@plus -4\p@ \@minus -2\p@}
+ {4\p@}{\normalsize\@ucheadtrue}%
+}
+
+% Rationale for changes made in next four definitions:
+% "Before skip" is made elastic to provide some give in setting columns (vs.
+% parskip, which is non-elastic to keep section headers "anchored" to their
+% subsequent text.
+%
+% "After skip" is minimized -- BUT setting it to 0pt resulted in run-in heads, despite
+% the documentation asserted only after-skip < 0pt would have result.
+%
+% Baselineskip added to style to ensure multi-line section titles, and section heads
+% followed by another section head rather than text, are decently spaced vertically.
+% 12 Jan 2000 gkmt
+\def\section{%
+ \@startsection{section}{1}{\z@}{-10\p@ \@plus -4\p@ \@minus -2\p@}%
+ {0.5pt}{\baselineskip=14pt\secfnt\@ucheadtrue}%
+}
+
+\def\subsection{%
+ \@startsection{subsection}{2}{\z@}{-10\p@ \@plus -4\p@ \@minus -2\p@}
+ {0.5pt}{\baselineskip=14pt\secfnt}%
+}
+\def\subsubsection{%
+ \@startsection{subsubsection}{3}{\z@}{-10\p@ \@plus -4\p@ \@minus -2\p@}%
+ {0.5pt}{\baselineskip=14pt\subsecfnt}%
+}
+
+%\def\paragraph{%
+% \vskip 12pt\@startsection{paragraph}{3}{\z@}{6\p@ \@plus \p@}% original
+% {-5\p@}{\subsecfnt}%
+%}
+% If one wants sections, subsections and subsubsections numbered,
+% but not paragraphs, one usually sets secnumepth to 3.
+% For that, the "depth" of paragraphs must be given correctly
+% in the definition (``4'' instead of ``3'' as second argument
+% of @startsection):
+\def\paragraph{%
+ \vskip 12pt\@startsection{paragraph}{4}{\z@}{6\p@ \@plus \p@}% % GM and Wolfgang May - 11/30/06
+ {-5\p@}{\subsecfnt}%
+}
+
+\let\@period=.
+\def\@startsection#1#2#3#4#5#6{%
+ \if@noskipsec %gkmt, 11 aug 99
+ \global\let\@period\@empty
+ \leavevmode
+ \global\let\@period.%
+ \fi
+ \par
+ \@tempskipa #4\relax
+ \@afterindenttrue
+ \ifdim \@tempskipa <\z@
+ \@tempskipa -\@tempskipa
+ \@afterindentfalse
+ \fi
+ %\if@nobreak 11 Jan 00 gkmt
+ %\everypar{}
+ %\else
+ \addpenalty\@secpenalty
+ \addvspace\@tempskipa
+ %\fi
+ \parskip=0pt
+ \@ifstar
+ {\@ssect{#3}{#4}{#5}{#6}}
+ {\@dblarg{\@sect{#1}{#2}{#3}{#4}{#5}{#6}}}%
+}
+
+
+\def\@ssect#1#2#3#4#5{%
+ \@tempskipa #3\relax
+ \ifdim \@tempskipa>\z@
+ \begingroup
+ #4{%
+ \@hangfrom{\hskip #1}%
+ \interlinepenalty \@M #5\@@par}%
+ \endgroup
+ \else
+ \def\@svsechd{#4{\hskip #1\relax #5}}%
+ \fi
+ \vskip -10.5pt %gkmt, 7 jan 00 -- had been -14pt, now set to parskip
+ \@xsect{#3}\parskip=10.5pt} % within the starred section, parskip = leading 12 Jan 2000 gkmt
+
+
+\def\@sect#1#2#3#4#5#6[#7]#8{%
+ \ifnum #2>\c@secnumdepth
+ \let\@svsec\@empty
+ \else
+ \refstepcounter{#1}%
+ \edef\@svsec{%
+ \begingroup
+ %\ifnum#2>2 \noexpand\rm \fi % changed to next 29 July 2002 gkmt
+ \ifnum#2>2 \noexpand#6 \fi
+ \csname the#1\endcsname
+ \endgroup
+ \ifnum #2=1\relax .\fi
+ \hskip 1em
+ }%
+ \fi
+ \@tempskipa #5\relax
+ \ifdim \@tempskipa>\z@
+ \begingroup
+ #6\relax
+ \@hangfrom{\hskip #3\relax\@svsec}%
+ \begingroup
+ \interlinepenalty \@M
+ \if@uchead
+ \uppercase{#8}%
+ \else
+ #8%
+ \fi
+ \par
+ \endgroup
+ \endgroup
+ \csname #1mark\endcsname{#7}%
+ \vskip -10.5pt % -14pt gkmt, 11 aug 99 -- changed to -\parskip 11 Jan 2000
+ \addcontentsline{toc}{#1}{%
+ \ifnum #2>\c@secnumdepth \else
+ \protect\numberline{\csname the#1\endcsname}%
+ \fi
+ #7%
+ }%
+ \else
+ \def\@svsechd{%
+ #6%
+ \hskip #3\relax
+ \@svsec
+ \if@uchead
+ \uppercase{#8}%
+ \else
+ #8%
+ \fi
+ \csname #1mark\endcsname{#7}%
+ \addcontentsline{toc}{#1}{%
+ \ifnum #2>\c@secnumdepth \else
+ \protect\numberline{\csname the#1\endcsname}%
+ \fi
+ #7%
+ }%
+ }%
+ \fi
+ \@xsect{#5}\parskip=10.5pt% within the section, parskip = leading 12 Jan 2000 gkmt
+}
+\def\@xsect#1{%
+ \@tempskipa #1\relax
+ \ifdim \@tempskipa>\z@
+ \par
+ \nobreak
+ \vskip \@tempskipa
+ \@afterheading
+ \else
+ \global\@nobreakfalse
+ \global\@noskipsectrue
+ \everypar{%
+ \if@noskipsec
+ \global\@noskipsecfalse
+ \clubpenalty\@M
+ \hskip -\parindent
+ \begingroup
+ \@svsechd
+ \@period
+ \endgroup
+ \unskip
+ \@tempskipa #1\relax
+ \hskip -\@tempskipa
+ \else
+ \clubpenalty \@clubpenalty
+ \everypar{}%
+ \fi
+ }%
+ \fi
+ \ignorespaces
+}
+
+\def\@trivlist{%
+ \@topsepadd\topsep
+ \if@noskipsec
+ \global\let\@period\@empty
+ \leavevmode
+ \global\let\@period.%
+ \fi
+ \ifvmode
+ \advance\@topsepadd\partopsep
+ \else
+ \unskip
+ \par
+ \fi
+ \if@inlabel
+ \@noparitemtrue
+ \@noparlisttrue
+ \else
+ \@noparlistfalse
+ \@topsep\@topsepadd
+ \fi
+ \advance\@topsep \parskip
+ \leftskip\z@skip
+ \rightskip\@rightskip
+ \parfillskip\@flushglue
+ \@setpar{\if@newlist\else{\@@par}\fi}
+ \global\@newlisttrue
+ \@outerparskip\parskip
+}
+
+%%% Actually, 'abbrev' works just fine as the default - Gerry Feb. 2000
+%%% Bibliography style.
+
+\parindent 0pt
+\typeout{Using 'Abbrev' bibliography style}
+\newcommand\bibyear[2]{%
+ \unskip\quad\ignorespaces#1\unskip
+ \if#2..\quad \else \quad#2 \fi
+}
+\newcommand{\bibemph}[1]{{\em#1}}
+\newcommand{\bibemphic}[1]{{\em#1\/}}
+\newcommand{\bibsc}[1]{{\sc#1}}
+\def\@normalcite{%
+ \def\@cite##1##2{[##1\if@tempswa , ##2\fi]}%
+}
+\def\@citeNB{%
+ \def\@cite##1##2{##1\if@tempswa , ##2\fi}%
+}
+\def\@citeRB{%
+ \def\@cite##1##2{##1\if@tempswa , ##2\fi]}%
+}
+\def\start@cite#1#2{%
+ \edef\citeauthoryear##1##2##3{%
+ ###1%
+ \ifnum#2=\z@ \else\ ###2\fi
+ }%
+ \ifnum#1=\thr@@
+ \let\@@cite\@citeyear
+ \else
+ \let\@@cite\@citenormal
+ \fi
+ \@ifstar{\@citeNB\@@cite}{\@normalcite\@@cite}%
+}
+%\def\cite{\start@cite23}
+\DeclareRobustCommand\cite{\start@cite23} % January 2008
+\def\citeNP{\cite*} % No Parentheses e.g. 5
+%\def\citeA{\start@cite10}
+\DeclareRobustCommand\citeA{\start@cite10} % January 2008
+\def\citeANP{\citeA*}
+%\def\shortcite{\start@cite23}
+\DeclareRobustCommand\shortcite{\start@cite23} % January 2008
+\def\shortciteNP{\shortcite*}
+%\def\shortciteA{\start@cite20}
+\DeclareRobustCommand\shortciteA{\start@cite20} % January 2008
+\def\shortciteANP{\shortciteA*}
+%\def\citeyear{\start@cite30}
+\DeclareRobustCommand\citeyear{\start@cite30} % January 2008
+\def\citeyearNP{\citeyear*}
+%\def\citeN{%
+\DeclareRobustCommand\citeN{% % January 2008
+ \@citeRB
+ \def\citeauthoryear##1##2##3{##1\ [##3%
+ \def\reserved@a{##1}%
+ \def\citeauthoryear####1####2####3{%
+ \def\reserved@b{####1}%
+ \ifx\reserved@a\reserved@b
+ ####3%
+ \else
+ \errmessage{Package acmart Error: author mismatch
+ in \string\citeN^^J^^J%
+ See the acmart package documentation for explanation}%
+ \fi
+ }%
+ }%
+ \@ifstar\@citeyear\@citeyear
+}
+%\def\shortciteN{%
+\DeclareRobustCommand\shortciteN{% % January 2008
+ \@citeRB
+ \def\citeauthoryear##1##2##3{##2\ [##3%
+ \def\reserved@a{##2}%
+ \def\citeauthoryear####1####2####3{%
+ \def\reserved@b{####2}%
+ \ifx\reserved@a\reserved@b
+ ####3%
+ \else
+ \errmessage{Package acmart Error: author mismatch
+ in \string\shortciteN^^J^^J%
+ See the acmart package documentation for explanation}%
+ \fi
+ }%
+ }%
+ \@ifstar\@citeyear\@citeyear % changed from "\@ifstart" 12 Jan 2000 gkmt
+}
+
+\def\@citenormal{%
+ \@ifnextchar [{\@tempswatrue\@citex;}%
+% original {\@tempswafalse\@citex,[]}% was ; Gerry 2/24/00
+{\@tempswafalse\@citex[]}% % GERRY FIX FOR BABEL 3/20/2009
+}
+
+\def\@citeyear{%
+ \@ifnextchar [{\@tempswatrue\@citex,}%
+% original {\@tempswafalse\@citex,[]}%
+{\@tempswafalse\@citex[]}% % GERRY FIX FOR BABEL 3/20/2009
+}
+
+\def\@citex#1[#2]#3{%
+ \let\@citea\@empty
+ \@cite{%
+ \@for\@citeb:=#3\do{%
+ \@citea
+% original \def\@citea{#1 }%
+ \def\@citea{#1, }% % GERRY FIX FOR BABEL 3/20/2009 -- SO THAT YOU GET [1, 2] IN THE BODY TEXT
+ \edef\@citeb{\expandafter\@iden\@citeb}%
+ \if@filesw
+ \immediate\write\@auxout{\string\citation{\@citeb}}%
+ \fi
+ \@ifundefined{b@\@citeb}{%
+ {\bf ?}%
+ \@warning{%
+ Citation `\@citeb' on page \thepage\space undefined%
+ }%
+ }%
+ {\csname b@\@citeb\endcsname}%
+ }%
+ }{#2}%
+}
+%\let\@biblabel\@gobble % Dec. 2008 - Gerry
+% ----
+\def\@biblabelnum#1{[#1]} % Gerry's solution #1 - for Natbib
+\let\@biblabel=\@biblabelnum % Gerry's solution #1 - for Natbib
+\def\newblock{\relax} % Gerry Dec. 2008
+% ---
+\newdimen\bibindent
+\setcounter{enumi}{1}
+\bibindent=0em
+\def\thebibliography#1{%
+\ifnum\addauflag=0\addauthorsection\global\addauflag=1\fi
+ \section[References]{% <=== OPTIONAL ARGUMENT ADDED HERE
+ {References} % was uppercased but this affects pdf bookmarks (SP/GM October 2004)
+ \@mkboth{{\refname}}{{\refname}}%
+ }%
+ \list{[\arabic{enumi}]}{%
+ \settowidth\labelwidth{[#1]}%
+ \leftmargin\labelwidth
+ \advance\leftmargin\labelsep
+ \advance\leftmargin\bibindent
+ \parsep=0pt\itemsep=1pt % GM July 2000
+ \itemindent -\bibindent
+ \listparindent \itemindent
+ \usecounter{enumi}
+ }%
+ \let\newblock\@empty
+ \raggedright % GM July 2000
+ \sloppy
+ \sfcode`\.=1000\relax
+}
+
+
+\gdef\balancecolumns
+{\vfill\eject
+\global\@colht=\textheight
+\global\ht\@cclv=\textheight
+}
+
+\newcount\colcntr
+\global\colcntr=0
+%\newbox\savebox
+\newbox\saveb@x % January 2008
+
+\gdef \@makecol {%
+\global\advance\colcntr by 1
+\ifnum\colcntr>2 \global\colcntr=1\fi
+ \ifvoid\footins
+ \setbox\@outputbox \box\@cclv
+ \else
+ \setbox\@outputbox \vbox{%
+\boxmaxdepth \@maxdepth
+ \@tempdima\dp\@cclv
+ \unvbox \@cclv
+ \vskip-\@tempdima
+ \vskip \skip\footins
+ \color@begingroup
+ \normalcolor
+ \footnoterule
+ \unvbox \footins
+ \color@endgroup
+ }%
+ \fi
+ \xdef\@freelist{\@freelist\@midlist}%
+ \global \let \@midlist \@empty
+ \@combinefloats
+ \ifvbox\@kludgeins
+ \@makespecialcolbox
+ \else
+ \setbox\@outputbox \vbox to\@colht {%
+\@texttop
+ \dimen@ \dp\@outputbox
+ \unvbox \@outputbox
+ \vskip -\dimen@
+ \@textbottom
+ }%
+ \fi
+ \global \maxdepth \@maxdepth
+}
+\def\titlenote{\@ifnextchar[\@xtitlenote{\stepcounter\@mpfn
+\global\advance\titlenotecount by 1
+\ifnum\titlenotecount=1
+ \raisebox{9pt}{$\ast$}
+\fi
+\ifnum\titlenotecount=2
+ \raisebox{9pt}{$\dagger$}
+\fi
+\ifnum\titlenotecount=3
+ \raisebox{9pt}{$\ddagger$}
+\fi
+\ifnum\titlenotecount=4
+\raisebox{9pt}{$\S$}
+\fi
+\ifnum\titlenotecount=5
+\raisebox{9pt}{$\P$}
+\fi
+ \@titlenotetext
+}}
+
+\long\def\@titlenotetext#1{\insert\footins{%
+\ifnum\titlenotecount=1\global\tntoks={#1}\fi
+\ifnum\titlenotecount=2\global\tntokstwo={#1}\fi
+\ifnum\titlenotecount=3\global\tntoksthree={#1}\fi
+\ifnum\titlenotecount=4\global\tntoksfour={#1}\fi
+\ifnum\titlenotecount=5\global\tntoksfive={#1}\fi
+ \reset@font\footnotesize
+ \interlinepenalty\interfootnotelinepenalty
+ \splittopskip\footnotesep
+ \splitmaxdepth \dp\strutbox \floatingpenalty \@MM
+ \hsize\columnwidth \@parboxrestore
+ \protected@edef\@currentlabel{%
+ }%
+ \color@begingroup
+ \color@endgroup}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+\ps@plain
+\baselineskip=11pt
+\let\thepage\relax % For NO page numbers - Gerry Nov. 30th. 1999
+\def\setpagenumber#1{\global\setcounter{page}{#1}}
+%\pagenumbering{arabic} % Arabic page numbers but commented out for NO page numbes - Gerry Nov. 30th. 1999
+\twocolumn % Double column.
+\flushbottom % Even bottom -- alas, does not balance columns at end of document
+\pagestyle{plain}
+
+% Need Copyright Year and Copyright Data to be user definable (in .tex file).
+% Gerry Nov. 30th. 1999
+\newtoks\copyrtyr
+\newtoks\acmcopyr
+\newtoks\boilerplate
+\def\CopyrightYear#1{\global\copyrtyr{#1}}
+\def\crdata#1{\global\acmcopyr{#1}}
+\def\permission#1{\global\boilerplate{#1}}
+%
+
+\toappear{\the\boilerplate\par
+{\confname{\the\conf}} \the\confinfo\par \the\copyrightetc}
+% End of ACM_PROC_ARTICLE-SP.CLS -- V3.2SP - 04/22/2009 --
+% Gerry Murray -- Wednesday April 22nd. 2009
+%
+% The following section (i.e. 3 .sty inclusions) was added in May 2007 so as to fix the problems that many
+% authors were having with accents. Sometimes accents would occur, but the letter-character would be of a different
+% font. Conversely the letter-character font would be correct but, e.g. a 'bar' would appear superimposed on the
+% character instead of, say, an unlaut/diaresis. Sometimes the letter-character would NOT appear at all.
+% Using [T1]{fontenc} outright was not an option as this caused 99% of the authors to 'produce' a Type-3 (bitmapped)
+% PDF file - useless for production.
+%
+% For proper (font) accenting we NEED these packages to be part of the .cls file i.e. 'ae', 'aecompl' and 'aeguil'
+% ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%% This is file `ae.sty'
+\def\fileversion{1.3}
+\def\filedate{2001/02/12}
+\NeedsTeXFormat{LaTeX2e}
+%\ProvidesPackage{ae}[\filedate\space\fileversion\space % GM
+% Almost European Computer Modern] % GM - keeping the log file clean(er)
+\newif\if@ae@slides \@ae@slidesfalse
+\DeclareOption{slides}{\@ae@slidestrue}
+\ProcessOptions
+\fontfamily{aer}
+\RequirePackage[T1]{fontenc}
+\if@ae@slides
+ \renewcommand{\sfdefault}{laess}
+ \renewcommand{\rmdefault}{laess} % no roman
+ \renewcommand{\ttdefault}{laett}
+\else
+ \renewcommand{\sfdefault}{aess}
+ \renewcommand{\rmdefault}{aer}
+ \renewcommand{\ttdefault}{aett}
+\fi
+\endinput
+%%
+%% End of file `ae.sty'.
+%
+%
+\def\fileversion{0.9}
+\def\filedate{1998/07/23}
+\NeedsTeXFormat{LaTeX2e}
+%\ProvidesPackage{aecompl}[\filedate\space\fileversion\space % GM
+%T1 Complements for AE fonts (D. Roegel)] % GM -- keeping the log file clean(er)
+
+\def\@ae@compl#1{{\fontencoding{T1}\fontfamily{cmr}\selectfont\symbol{#1}}}
+\def\guillemotleft{\@ae@compl{19}}
+\def\guillemotright{\@ae@compl{20}}
+\def\guilsinglleft{\@ae@compl{14}}
+\def\guilsinglright{\@ae@compl{15}}
+\def\TH{\@ae@compl{222}}
+\def\NG{\@ae@compl{141}}
+\def\ng{\@ae@compl{173}}
+\def\th{\@ae@compl{254}}
+\def\DJ{\@ae@compl{208}}
+\def\dj{\@ae@compl{158}}
+\def\DH{\@ae@compl{208}}
+\def\dh{\@ae@compl{240}}
+\def\@perthousandzero{\@ae@compl{24}}
+\def\textperthousand{\%\@perthousandzero}
+\def\textpertenthousand{\%\@perthousandzero\@perthousandzero}
+\endinput
+%
+%
+%% This is file `aeguill.sty'
+% This file gives french guillemets (and not guillemots!)
+% built with the Polish CMR fonts (default), WNCYR fonts, the LASY fonts
+% or with the EC fonts.
+% This is useful in conjunction with the ae package
+% (this package loads the ae package in case it has not been loaded)
+% and with or without the french(le) package.
+%
+% In order to get the guillemets, it is necessary to either type
+% \guillemotleft and \guillemotright, or to use an 8 bit encoding
+% (such as ISO-Latin1) which selects these two commands,
+% or, if you use the french package (but not the frenchle package),
+% to type << or >>.
+%
+% By default, you get the Polish CMR guillemets; if this package is loaded
+% with the `cm' option, you get the LASY guillemets; with `ec,' you
+% get the EC guillemets, and with `cyr,' you get the cyrillic guillemets.
+%
+% In verbatim mode, you always get the EC/TT guillemets.
+%
+% The default option is interesting in conjunction with PDF,
+% because there is a Type 1 version of the Polish CMR fonts
+% and these guillemets are very close in shape to the EC guillemets.
+% There are no free Type 1 versions of the EC fonts.
+%
+% Support for Polish CMR guillemets was kindly provided by
+% Rolf Niepraschk in version 0.99 (2000/05/22).
+% Bernd Raichle provided extensive simplifications to the code
+% for version 1.00.
+%
+% This package is released under the LPPL.
+%
+% Changes:
+% Date version
+% 2001/04/12 1.01 the frenchle and french package are now distinguished.
+%
+\def\fileversion{1.01}
+\def\filedate{2001/04/12}
+\NeedsTeXFormat{LaTeX2e}
+%\ProvidesPackage{aeguill}[2001/04/12 1.01 % % GM
+%AE fonts with french guillemets (D. Roegel)] % GM - keeping the log file clean(er)
+%\RequirePackage{ae} % GM May 2007 - already embedded here
+
+\newcommand{\@ae@switch}[4]{#4}
+\DeclareOption{ec}{\renewcommand\@ae@switch[4]{#1}}
+\DeclareOption{cm}{\renewcommand\@ae@switch[4]{#2}}
+\DeclareOption{cyr}{\renewcommand\@ae@switch[4]{#3}}
+\DeclareOption{pl}{\renewcommand\@ae@switch[4]{#4}}
+\ExecuteOptions{pl}
+\ProcessOptions
+
+%
+% Load necessary packages
+%
+\@ae@switch{% ec
+ % do nothing
+}{% cm
+ \RequirePackage{latexsym}% GM - May 2007 - already 'mentioned as required' up above
+}{% cyr
+ \RequirePackage[OT2,T1]{fontenc}%
+}{% pl
+ \RequirePackage[OT4,T1]{fontenc}%
+}
+
+% The following command will be compared to \frenchname,
+% as defined in french.sty and frenchle.sty.
+\def\aeguillfrenchdefault{french}%
+
+\let\guill@verbatim@font\verbatim@font
+\def\verbatim@font{\guill@verbatim@font\ecguills{cmtt}%
+ \let\guillemotleft\@oguills\let\guillemotright\@fguills}
+
+\begingroup \catcode`\<=13 \catcode`\>=13
+\def\x{\endgroup
+ \def\ae@lfguill{<<}%
+ \def\ae@rfguill{>>}%
+}\x
+
+\newcommand{\ecguills}[1]{%
+ \def\selectguillfont{\fontencoding{T1}\fontfamily{#1}\selectfont}%
+ \def\@oguills{{\selectguillfont\symbol{19}}}%
+ \def\@fguills{{\selectguillfont\symbol{20}}}%
+ }
+
+\newcommand{\aeguills}{%
+ \ae@guills
+ % We redefine \guillemotleft and \guillemotright
+ % in order to catch them when they are used
+ % with \DeclareInputText (in latin1.def for instance)
+ % We use \auxWARNINGi as a safe indicator that french.sty is used.
+ \gdef\guillemotleft{\ifx\auxWARNINGi\undefined
+ \@oguills % neither french.sty nor frenchle.sty
+ \else
+ \ifx\aeguillfrenchdefault\frenchname
+ \ae@lfguill % french.sty
+ \else
+ \@oguills % frenchle.sty
+ \fi
+ \fi}%
+ \gdef\guillemotright{\ifx\auxWARNINGi\undefined
+ \@fguills % neither french.sty nor frenchle.sty
+ \else
+ \ifx\aeguillfrenchdefault\frenchname
+ \ae@rfguill % french.sty
+ \else
+ \@fguills % frenchle.sty
+ \fi
+ \fi}%
+ }
+
+%
+% Depending on the class option
+% define the internal command \ae@guills
+\@ae@switch{% ec
+ \newcommand{\ae@guills}{%
+ \ecguills{cmr}}%
+}{% cm
+ \newcommand{\ae@guills}{%
+ \def\selectguillfont{\fontencoding{U}\fontfamily{lasy}%
+ \fontseries{m}\fontshape{n}\selectfont}%
+ \def\@oguills{\leavevmode\nobreak
+ \hbox{\selectguillfont (\kern-.20em(\kern.20em}\nobreak}%
+ \def\@fguills{\leavevmode\nobreak
+ \hbox{\selectguillfont \kern.20em)\kern-.2em)}%
+ \ifdim\fontdimen\@ne\font>\z@\/\fi}}%
+}{% cyr
+ \newcommand{\ae@guills}{%
+ \def\selectguillfont{\fontencoding{OT2}\fontfamily{wncyr}\selectfont}%
+ \def\@oguills{{\selectguillfont\symbol{60}}}%
+ \def\@fguills{{\selectguillfont\symbol{62}}}}
+}{% pl
+ \newcommand{\ae@guills}{%
+ \def\selectguillfont{\fontencoding{OT4}\fontfamily{cmr}\selectfont}%
+ \def\@oguills{{\selectguillfont\symbol{174}}}%
+ \def\@fguills{{\selectguillfont\symbol{175}}}}
+}
+
+
+\AtBeginDocument{%
+ \ifx\GOfrench\undefined
+ \aeguills
+ \else
+ \let\aeguill@GOfrench\GOfrench
+ \gdef\GOfrench{\aeguill@GOfrench \aeguills}%
+ \fi
+ }
+
+\endinput
+%
+
+
diff --git a/publications/whitepaper/druid.bib b/publications/whitepaper/druid.bib
index 0d1418305bb..797207adeb6 100644
--- a/publications/whitepaper/druid.bib
+++ b/publications/whitepaper/druid.bib
@@ -1,348 +1,368 @@
-@article{cattell2011scalable,
- title={Scalable SQL and NoSQL data stores},
- author={Cattell, Rick},
- journal={ACM SIGMOD Record},
- volume={39},
- number={4},
- pages={12--27},
- year={2011},
- publisher={ACM}
-}
+ @article{cattell2011scalable,
+ title={Scalable SQL and NoSQL data stores},
+ author={Cattell, Rick},
+ journal={ACM SIGMOD Record},
+ volume={39},
+ number={4},
+ pages={12--27},
+ year={2011},
+ publisher={ACM}
+ }
-@article{chang2008bigtable,
- title={Bigtable: A distributed storage system for structured data},
- author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E},
- journal={ACM Transactions on Computer Systems (TOCS)},
- volume={26},
- number={2},
- pages={4},
- year={2008},
- publisher={ACM}
-}
+ @article{chang2008bigtable,
+ title={Bigtable: A distributed storage system for structured data},
+ author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E},
+ journal={ACM Transactions on Computer Systems (TOCS)},
+ volume={26},
+ number={2},
+ pages={4},
+ year={2008},
+ publisher={ACM}
+ }
-@inproceedings{decandia2007dynamo,
- title={Dynamo: amazon's highly available key-value store},
- author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
- booktitle={ACM SIGOPS Operating Systems Review},
- volume={41},
- number={6},
- pages={205--220},
- year={2007},
- organization={ACM}
-}
+ @inproceedings{decandia2007dynamo,
+ title={Dynamo: amazon's highly available key-value store},
+ author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
+ booktitle={ACM SIGOPS Operating Systems Review},
+ volume={41},
+ number={6},
+ pages={205--220},
+ year={2007},
+ organization={ACM}
+ }
-@inproceedings{bear2012vertica,
- title={The vertica database: SQL RDBMS for managing big data},
- author={Bear, Chuck and Lamb, Andrew and Tran, Nga},
- booktitle={Proceedings of the 2012 workshop on Management of big data systems},
- pages={37--38},
- year={2012},
- organization={ACM}
-}
+ @inproceedings{bear2012vertica,
+ title={The vertica database: SQL RDBMS for managing big data},
+ author={Bear, Chuck and Lamb, Andrew and Tran, Nga},
+ booktitle={Proceedings of the 2012 workshop on Management of big data systems},
+ pages={37--38},
+ year={2012},
+ organization={ACM}
+ }
-@article{lakshman2010cassandra,
- title={Cassandra—A decentralized structured storage system},
- author={Lakshman, Avinash and Malik, Prashant},
- journal={Operating systems review},
- volume={44},
- number={2},
- pages={35},
- year={2010}
-}
+ @article{lakshman2010cassandra,
+ title={Cassandra—A decentralized structured storage system},
+ author={Lakshman, Avinash and Malik, Prashant},
+ journal={Operating systems review},
+ volume={44},
+ number={2},
+ pages={35},
+ year={2010}
+ }
-@article{melnik2010dremel,
- title={Dremel: interactive analysis of web-scale datasets},
- author={Melnik, Sergey and Gubarev, Andrey and Long, Jing Jing and Romer, Geoffrey and Shivakumar, Shiva and Tolton, Matt and Vassilakis, Theo},
- journal={Proceedings of the VLDB Endowment},
- volume={3},
- number={1-2},
- pages={330--339},
- year={2010},
- publisher={VLDB Endowment}
-}
+ @article{melnik2010dremel,
+ title={Dremel: interactive analysis of web-scale datasets},
+ author={Melnik, Sergey and Gubarev, Andrey and Long, Jing Jing and Romer, Geoffrey and Shivakumar, Shiva and Tolton, Matt and Vassilakis, Theo},
+ journal={Proceedings of the VLDB Endowment},
+ volume={3},
+ number={1-2},
+ pages={330--339},
+ year={2010},
+ publisher={VLDB Endowment}
+ }
-@article{hall2012processing,
- title={Processing a trillion cells per mouse click},
- author={Hall, Alexander and Bachmann, Olaf and B{\"u}ssow, Robert and G{\u{a}}nceanu, Silviu and Nunkesser, Marc},
- journal={Proceedings of the VLDB Endowment},
- volume={5},
- number={11},
- pages={1436--1446},
- year={2012},
- publisher={VLDB Endowment}
-}
+ @article{hall2012processing,
+ title={Processing a trillion cells per mouse click},
+ author={Hall, Alexander and Bachmann, Olaf and B{\"u}ssow, Robert and G{\u{a}}nceanu, Silviu and Nunkesser, Marc},
+ journal={Proceedings of the VLDB Endowment},
+ volume={5},
+ number={11},
+ pages={1436--1446},
+ year={2012},
+ publisher={VLDB Endowment}
+ }
-@inproceedings{shvachko2010hadoop,
- title={The hadoop distributed file system},
- author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
- booktitle={Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on},
- pages={1--10},
- year={2010},
- organization={IEEE}
-}
+ @inproceedings{shvachko2010hadoop,
+ title={The hadoop distributed file system},
+ author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
+ booktitle={Mass Storage Systems and Technologies (MSST), 2010 IEEE 26th Symposium on},
+ pages={1--10},
+ year={2010},
+ organization={IEEE}
+ }
-@article{colantonio2010concise,
- title={Concise: Compressed ‘n’Composable Integer Set},
- author={Colantonio, Alessandro and Di Pietro, Roberto},
- journal={Information Processing Letters},
- volume={110},
- number={16},
- pages={644--650},
- year={2010},
- publisher={Elsevier}
-}
+ @article{colantonio2010concise,
+ title={Concise: Compressed ‘n’Composable Integer Set},
+ author={Colantonio, Alessandro and Di Pietro, Roberto},
+ journal={Information Processing Letters},
+ volume={110},
+ number={16},
+ pages={644--650},
+ year={2010},
+ publisher={Elsevier}
+ }
-@article{lerner2010redis,
- title={At the Forge: Redis},
- author={Lerner, Richard},
- journal={Linux Journal},
- volume={2010},
- number={197},
- pages={3},
- year={2010}
-}
+ @inproceedings{stonebraker2005c,
+ title={C-store: a column-oriented DBMS},
+ author={Stonebraker, Mike and Abadi, Daniel J and Batkin, Adam and Chen, Xuedong and Cherniack, Mitch and Ferreira, Miguel and Lau, Edmond and Lin, Amerson and Madden, Sam and O'Neil, Elizabeth and others},
+ booktitle={Proceedings of the 31st international conference on Very large data bases},
+ pages={553--564},
+ year={2005},
+ organization={VLDB Endowment}
+ }
-@inproceedings{stonebraker2005c,
- title={C-store: a column-oriented DBMS},
- author={Stonebraker, Mike and Abadi, Daniel J and Batkin, Adam and Chen, Xuedong and Cherniack, Mitch and Ferreira, Miguel and Lau, Edmond and Lin, Amerson and Madden, Sam and O'Neil, Elizabeth and others},
- booktitle={Proceedings of the 31st international conference on Very large data bases},
- pages={553--564},
- year={2005},
- organization={VLDB Endowment}
-}
+ @inproceedings{engle2012shark,
+ title={Shark: fast data analysis using coarse-grained distributed memory},
+ author={Engle, Cliff and Lupher, Antonio and Xin, Reynold and Zaharia, Matei and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
+ booktitle={Proceedings of the 2012 international conference on Management of Data},
+ pages={689--692},
+ year={2012},
+ organization={ACM}
+ }
-@inproceedings{engle2012shark,
- title={Shark: fast data analysis using coarse-grained distributed memory},
- author={Engle, Cliff and Lupher, Antonio and Xin, Reynold and Zaharia, Matei and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
- booktitle={Proceedings of the 2012 international conference on Management of Data},
- pages={689--692},
- year={2012},
- organization={ACM}
-}
+ @inproceedings{zaharia2012discretized,
+ title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
+ author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
+ booktitle={Proceedings of the 4th USENIX conference on Hot Topics in Cloud Computing},
+ pages={10--10},
+ year={2012},
+ organization={USENIX Association}
+ }
-@inproceedings{zaharia2012discretized,
- title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
- author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
- booktitle={Proceedings of the 4th USENIX conference on Hot Topics in Cloud Computing},
- pages={10--10},
- year={2012},
- organization={USENIX Association}
-}
+ @misc{marz2013storm,
+ author = {Marz, Nathan},
+ title = {Storm: Distributed and Fault-Tolerant Realtime Computation},
+ month = {February},
+ year = {2013},
+ howpublished = "\url{http://storm-project.net/}"
+ }
-@misc{marz2013storm,
-author = {Marz, Nathan},
-title = {Storm: Distributed and Fault-Tolerant Realtime Computation},
-month = {February},
-year = {2013},
-howpublished = "\url{http://storm-project.net/}"
-}
+ @misc{tschetter2011druid,
+ author = {Eric Tschetter},
+ title = {Introducing Druid: Real-Time Analytics at a Billion Rows Per Second},
+ month = {April},
+ year = {2011},
+ howpublished = "\url{http://metamarkets.com/2011/druid-part-i-real-time-analytics-at-a-billion-rows-per-second/}"
+ }
-@article{farber2012sap,
- title={SAP HANA database: data management for modern business applications},
- author={F{\"a}rber, Franz and Cha, Sang Kyun and Primsch, J{\"u}rgen and Bornh{\"o}vd, Christof and Sigg, Stefan and Lehner, Wolfgang},
- journal={ACM Sigmod Record},
- volume={40},
- number={4},
- pages={45--51},
- year={2012},
- publisher={ACM}
-}
+ @article{farber2012sap,
+ title={SAP HANA database: data management for modern business applications},
+ author={F{\"a}rber, Franz and Cha, Sang Kyun and Primsch, J{\"u}rgen and Bornh{\"o}vd, Christof and Sigg, Stefan and Lehner, Wolfgang},
+ journal={ACM Sigmod Record},
+ volume={40},
+ number={4},
+ pages={45--51},
+ year={2012},
+ publisher={ACM}
+ }
-@misc{voltdb2010voltdb,
- title={VoltDB Technical Overview},
- author={VoltDB, LLC},
- year={2010},
-howpublished = "\url{https://voltdb.com/}"
-}
+ @misc{voltdb2010voltdb,
+ title={VoltDB Technical Overview},
+ author={VoltDB, LLC},
+ year={2010},
+ howpublished = "\url{https://voltdb.com/}"
+ }
-@inproceedings{macnicol2004sybase,
- title={Sybase IQ multiplex-designed for analytics},
- author={MacNicol, Roger and French, Blaine},
- booktitle={Proceedings of the Thirtieth international conference on Very large data bases-Volume 30},
- pages={1227--1230},
- year={2004},
- organization={VLDB Endowment}
-}
+ @inproceedings{macnicol2004sybase,
+ title={Sybase IQ multiplex-designed for analytics},
+ author={MacNicol, Roger and French, Blaine},
+ booktitle={Proceedings of the Thirtieth international conference on Very large data bases-Volume 30},
+ pages={1227--1230},
+ year={2004},
+ organization={VLDB Endowment}
+ }
-@inproceedings{singh2011introduction,
- title={Introduction to the IBM Netezza warehouse appliance},
- author={Singh, Malcolm and Leonhardi, Ben},
- booktitle={Proceedings of the 2011 Conference of the Center for Advanced Studies on Collaborative Research},
- pages={385--386},
- year={2011},
- organization={IBM Corp.}
-}
+ @inproceedings{singh2011introduction,
+ title={Introduction to the IBM Netezza warehouse appliance},
+ author={Singh, Malcolm and Leonhardi, Ben},
+ booktitle={Proceedings of the 2011 Conference of the Center for Advanced Studies on Collaborative Research},
+ pages={385--386},
+ year={2011},
+ organization={IBM Corp.}
+ }
-@inproceedings{miner2012unified,
- title={Unified analytics platform for big data},
- author={Miner, Donald},
- booktitle={Proceedings of the WICSA/ECSA 2012 Companion Volume},
- pages={176--176},
- year={2012},
- organization={ACM}
-}
+ @inproceedings{miner2012unified,
+ title={Unified analytics platform for big data},
+ author={Miner, Donald},
+ booktitle={Proceedings of the WICSA/ECSA 2012 Companion Volume},
+ pages={176--176},
+ year={2012},
+ organization={ACM}
+ }
-@inproceedings{fink2012distributed,
- title={Distributed computation on dynamo-style distributed storage: riak pipe},
- author={Fink, Bryan},
- booktitle={Proceedings of the eleventh ACM SIGPLAN workshop on Erlang workshop},
- pages={43--50},
- year={2012},
- organization={ACM}
-}
+ @inproceedings{fink2012distributed,
+ title={Distributed computation on dynamo-style distributed storage: riak pipe},
+ author={Fink, Bryan},
+ booktitle={Proceedings of the eleventh ACM SIGPLAN workshop on Erlang workshop},
+ pages={43--50},
+ year={2012},
+ organization={ACM}
+ }
-@misc{paraccel2013,
-key = {ParAccel Analytic Database},
-title = {ParAccel Analytic Database},
-month = {March},
-year = {2013},
-howpublished = "\url{http://www.paraccel.com/resources/Datasheets/ParAccel-Core-Analytic-Database.pdf}"
-}
+ @misc{paraccel2013,
+ key = {ParAccel Analytic Database},
+ title = {ParAccel Analytic Database},
+ month = {March},
+ year = {2013},
+ howpublished = "\url{http://www.paraccel.com/resources/Datasheets/ParAccel-Core-Analytic-Database.pdf}"
+ }
-@article{barroso2009datacenter,
- title={The datacenter as a computer: An introduction to the design of warehouse-scale machines},
- author={Barroso, Luiz Andr{\'e} and H{\"o}lzle, Urs},
- journal={Synthesis Lectures on Computer Architecture},
- volume={4},
- number={1},
- pages={1--108},
- year={2009},
- publisher={Morgan \& Claypool Publishers}
-}
+ @misc{cloudera2013,
+ key = {Cloudera Impala},
+ title = {Cloudera Impala},
+ month = {March},
+ year = {2013},
+ url = {},
+ howpublished = "\url{http://blog.cloudera.com/blog}"
+ }
-@article{chaudhuri1997overview,
- title={An overview of data warehousing and OLAP technology},
- author={Chaudhuri, Surajit and Dayal, Umeshwar},
- journal={ACM Sigmod record},
- volume={26},
- number={1},
- pages={65--74},
- year={1997}
-}
+ @inproceedings{hunt2010zookeeper,
+ title={ZooKeeper: Wait-free coordination for Internet-scale systems},
+ author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio P and Reed, Benjamin},
+ booktitle={USENIX ATC},
+ volume={10},
+ year={2010}
+ }
-@article{dewitt1992parallel,
- title={Parallel database systems: the future of high performance database systems},
- author={DeWitt, David and Gray, Jim},
- journal={Communications of the ACM},
- volume={35},
- number={6},
- pages={85--98},
- year={1992},
- publisher={ACM}
-}
+ @inproceedings{kreps2011kafka,
+ title={Kafka: A distributed messaging system for log processing},
+ author={Kreps, Jay and Narkhede, Neha and Rao, Jun},
+ booktitle={Proceedings of 6th International Workshop on Networking Meets Databases (NetDB), Athens, Greece},
+ year={2011}
+ }
-@misc{cloudera2013,
-key = {Cloudera Impala},
-title = {Cloudera Impala},
-month = {March},
-year = {2013},
-url = {},
-howpublished = "\url{http://blog.cloudera.com/blog}"
-}
+ @misc{liblzf2013,
+ title = {LibLZF},
+ key = {LibLZF},
+ month = {March},
+ year = {2013},
+ howpublished = "\url{http://freecode.com/projects/liblzf}"
+ }
-@inproceedings{hunt2010zookeeper,
- title={ZooKeeper: Wait-free coordination for Internet-scale systems},
- author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio P and Reed, Benjamin},
- booktitle={USENIX ATC},
- volume={10},
- year={2010}
-}
+ @inproceedings{tomasic1993performance,
+ title={Performance of inverted indices in shared-nothing distributed text document information retrieval systems},
+ author={Tomasic, Anthony and Garcia-Molina, Hector},
+ booktitle={Parallel and Distributed Information Systems, 1993., Proceedings of the Second International Conference on},
+ pages={8--17},
+ year={1993},
+ organization={IEEE}
+ }
-@inproceedings{kreps2011kafka,
- title={Kafka: A distributed messaging system for log processing},
- author={Kreps, Jay and Narkhede, Neha and Rao, Jun},
- booktitle={Proceedings of 6th International Workshop on Networking Meets Databases (NetDB), Athens, Greece},
- year={2011}
-}
+ @inproceedings{antoshenkov1995byte,
+ title={Byte-aligned bitmap compression},
+ author={Antoshenkov, Gennady},
+ booktitle={Data Compression Conference, 1995. DCC'95. Proceedings},
+ pages={476},
+ year={1995},
+ organization={IEEE}
+ }
-@misc{liblzf2013,
-title = {LibLZF},
-key = {LibLZF},
-month = {March},
-year = {2013},
-howpublished = "\url{http://freecode.com/projects/liblzf}"
-}
+ @inproceedings{van2011memory,
+ title={A memory efficient reachability data structure through bit vector compression},
+ author={van Schaik, Sebastiaan J and de Moor, Oege},
+ booktitle={Proceedings of the 2011 international conference on Management of data},
+ pages={913--924},
+ year={2011},
+ organization={ACM}
+ }
-@inproceedings{tomasic1993performance,
- title={Performance of inverted indices in shared-nothing distributed text document information retrieval systems},
- author={Tomasic, Anthony and Garcia-Molina, Hector},
- booktitle={Parallel and Distributed Information Systems, 1993., Proceedings of the Second International Conference on},
- pages={8--17},
- year={1993},
- organization={IEEE}
-}
+ @inproceedings{o1993lru,
+ title={The LRU-K page replacement algorithm for database disk buffering},
+ author={O'neil, Elizabeth J and O'neil, Patrick E and Weikum, Gerhard},
+ booktitle={ACM SIGMOD Record},
+ volume={22},
+ number={2},
+ pages={297--306},
+ year={1993},
+ organization={ACM}
+ }
-@inproceedings{antoshenkov1995byte,
- title={Byte-aligned bitmap compression},
- author={Antoshenkov, Gennady},
- booktitle={Data Compression Conference, 1995. DCC'95. Proceedings},
- pages={476},
- year={1995},
- organization={IEEE}
-}
+ @article{kim2001lrfu,
+ title={LRFU: A spectrum of policies that subsumes the least recently used and least frequently used policies},
+ author={Kim, Chong Sang},
+ journal={IEEE Transactions on Computers},
+ volume={50},
+ number={12},
+ year={2001}
+ }
-@inproceedings{van2011memory,
- title={A memory efficient reachability data structure through bit vector compression},
- author={van Schaik, Sebastiaan J and de Moor, Oege},
- booktitle={Proceedings of the 2011 international conference on Management of data},
- pages={913--924},
- year={2011},
- organization={ACM}
-}
+ @article{wu2006optimizing,
+ title={Optimizing bitmap indices with efficient compression},
+ author={Wu, Kesheng and Otoo, Ekow J and Shoshani, Arie},
+ journal={ACM Transactions on Database Systems (TODS)},
+ volume={31},
+ number={1},
+ pages={1--38},
+ year={2006},
+ publisher={ACM}
+ }
-@inproceedings{o1993lru,
- title={The LRU-K page replacement algorithm for database disk buffering},
- author={O'neil, Elizabeth J and O'neil, Patrick E and Weikum, Gerhard},
- booktitle={ACM SIGMOD Record},
- volume={22},
- number={2},
- pages={297--306},
- year={1993},
- organization={ACM}
-}
+ @misc{twitter2013,
+ key = {Twitter Public Streams},
+ title = {Twitter Public Streams},
+ month = {March},
+ year = {2013},
+ howpublished = "\url{https://dev.twitter.com/docs/streaming-apis/streams/public}"
+ }
-@article{kim2001lrfu,
- title={LRFU: A spectrum of policies that subsumes the least recently used and least frequently used policies},
- author={Kim, Chong Sang},
- journal={IEEE Transactions on Computers},
- volume={50},
- number={12},
- year={2001}
-}
+ @article{fitzpatrick2004distributed,
+ title={Distributed caching with memcached},
+ author={Fitzpatrick, Brad},
+ journal={Linux journal},
+ number={124},
+ pages={72--74},
+ year={2004}
+ }
-@article{wu2006optimizing,
- title={Optimizing bitmap indices with efficient compression},
- author={Wu, Kesheng and Otoo, Ekow J and Shoshani, Arie},
- journal={ACM Transactions on Database Systems (TODS)},
- volume={31},
- number={1},
- pages={1--38},
- year={2006},
- publisher={ACM}
-}
+ @inproceedings{amdahl1967validity,
+ title={Validity of the single processor approach to achieving large scale computing capabilities},
+ author={Amdahl, Gene M},
+ booktitle={Proceedings of the April 18-20, 1967, spring joint computer conference},
+ pages={483--485},
+ year={1967},
+ organization={ACM}
+ }
+ @book{sarawagi1998discovery,
+ title={Discovery-driven exploration of OLAP data cubes},
+ author={Sarawagi, Sunita and Agrawal, Rakesh and Megiddo, Nimrod},
+ year={1998},
+ publisher={Springer}
+ }
+ @inproceedings{abadi2008column,
+ title={Column-Stores vs. Row-Stores: How different are they really?},
+ author={Abadi, Daniel J and Madden, Samuel R and Hachem, Nabil},
+ booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
+ pages={967--980},
+ year={2008},
+ organization={ACM}
+ }
+ @article{hu2011stream,
+ title={Stream Database Survey},
+ author={Hu, Bo},
+ year={2011}
+ }
-@misc{twitter2013,
-key = {Twitter Public Streams},
-title = {Twitter Public Streams},
-month = {March},
-year = {2013},
-howpublished = "\url{https://dev.twitter.com/docs/streaming-apis/streams/public}"
-}
+ @article{dean2008mapreduce,
+ title={MapReduce: simplified data processing on large clusters},
+ author={Dean, Jeffrey and Ghemawat, Sanjay},
+ journal={Communications of the ACM},
+ volume={51},
+ number={1},
+ pages={107--113},
+ year={2008},
+ publisher={ACM}
+ }
-@article{fitzpatrick2004distributed,
- title={Distributed caching with memcached},
- author={Fitzpatrick, Brad},
- journal={Linux journal},
- number={124},
- pages={72--74},
- year={2004}
-}
+ @misc{linkedin2013senseidb,
+ author = {LinkedIn},
+ title = {SenseiDB},
+ month = {July},
+ year = {2013},
+ howpublished = "\url{http://www.senseidb.com/}"
+ }
-@inproceedings{amdahl1967validity,
- title={Validity of the single processor approach to achieving large scale computing capabilities},
- author={Amdahl, Gene M},
- booktitle={Proceedings of the April 18-20, 1967, spring joint computer conference},
- pages={483--485},
- year={1967},
- organization={ACM}
-}
+ @misc{apache2013solr,
+ author = {Apache},
+ title = {Apache Solr},
+ month = {February},
+ year = {2013},
+ howpublished = "\url{http://lucene.apache.org/solr/}"
+ }
+
+ @misc{banon2013elasticsearch,
+ author = {Banon, Shay},
+ title = {ElasticSearch},
+ month = {July},
+ year = {2013},
+ howpublished = "\url{http://www.elasticseach.com/}"
+ }
diff --git a/publications/whitepaper/druid.pdf b/publications/whitepaper/druid.pdf
new file mode 100644
index 00000000000..f4d1d8b1720
Binary files /dev/null and b/publications/whitepaper/druid.pdf differ
diff --git a/publications/whitepaper/druid.tex b/publications/whitepaper/druid.tex
index 55083998d62..3ca2e6c4375 100644
--- a/publications/whitepaper/druid.tex
+++ b/publications/whitepaper/druid.tex
@@ -1,13 +1,6 @@
-% THIS IS AN EXAMPLE DOCUMENT FOR VLDB 2012
-% based on ACM SIGPROC-SP.TEX VERSION 2.7
-% Modified by Gerald Weber
-% Removed the requirement to include *bbl file in here. (AhmetSacan, Sep2012)
-% Fixed the equation on page 3 to prevent line overflow. (AhmetSacan, Sep2012)
-
-%\documentclass[draft]{vldb}
-\documentclass{vldb}
+\documentclass{acm_proc_article-sp}
\usepackage{graphicx}
-\usepackage{balance} % for \balance command ON LAST PAGE (only there!)
+\usepackage{balance}
\usepackage{fontspec}
\setmainfont[Ligatures={TeX}]{Times}
\usepackage{hyperref}
@@ -21,509 +14,531 @@
\title{Druid: A Real-time Analytical Data Store}
-% possible, but not really needed or used for PVLDB:
-%\subtitle{[Extended Abstract]
-%\titlenote{A full version of this paper is available as\textit{Author's Guide to Preparing ACM SIG Proceedings Using \LaTeX$2_\epsilon$\ and BibTeX} at \texttt{www.acm.org/eaddress.htm}}}
-
% ****************** AUTHORS **************************************
-% You need the command \numberofauthors to handle the 'placement
-% and alignment' of the authors beneath the title.
-%
-% For aesthetic reasons, we recommend 'three authors at a time'
-% i.e. three 'name/affiliation blocks' be placed beneath the title.
-%
-% NOTE: You are NOT restricted in how many 'rows' of
-% "name/affiliations" may appear. We just ask that you restrict
-% the number of 'columns' to three.
-%
-% Because of the available 'opening page real-estate'
-% we ask you to refrain from putting more than six authors
-% (two rows with three columns) beneath the article title.
-% More than six makes the first-page appear very cluttered indeed.
-%
-% Use the \alignauthor commands to handle the names
-% and affiliations for an 'aesthetic maximum' of six authors.
-% Add names, affiliations, addresses for
-% the seventh etc. author(s) as the argument for the
-% \additionalauthors command.
-% These 'additional authors' will be output/set for you
-% without further effort on your part as the last section in
-% the body of your article BEFORE References or any Appendices.
-
-\numberofauthors{7} % in this sample file, there are a *total*
-% of EIGHT authors. SIX appear on the 'first-page' (for formatting
-% reasons) and the remaining two appear in the \additionalauthors section.
-
+\numberofauthors{6}
\author{
-% You can go ahead and credit any number of authors here,
-% e.g. one 'row of three' or two rows (consisting of one row of three
-% and a second row of one, two or three).
-%
-% The command \alignauthor (no curly braces needed) should
-% precede each author name, affiliation/snail-mail address and
-% e-mail address. Additionally, tag each line of
-% affiliation/address with \affaddr, and tag the
-% e-mail address with \email.
-%
-% 1st. author
-\alignauthor Fangjin Yang\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{fangjin@metamarkets.com}
-\alignauthor Eric Tschetter\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{eric@metamarkets.com}
-\alignauthor Gian Merlino\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{gian@metamarkets.com}
-\and
-\alignauthor Nelson Ray\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{nelson@metamarkets.com}
-\alignauthor Xavier Léauté\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{xavier@metamarkets.com}
-\alignauthor Deep Ganguli\\
-\affaddr{Metamarkets Group, Inc.}\\
-\affaddr{625 2nd Street, Suite 230}\\
-\affaddr{San Francisco, CA, USA}\\
-\email{deep@metamarkets.com}
+\alignauthor Fangjin Yang, Eric Tschetter, Gian Merlino, Nelson Ray, Xavier Léauté, Deep Ganguli, Himadri Singh\\
+\email{\{fangjin, cheddar, gian, nelson, xavier, deep, himadri\}@metamarkets.com}
}
-% There's nothing stopping you putting the seventh, eighth, etc.
-% author on the opening page (as the 'third row') but we ask,
-% for aesthetic reasons that you place these 'additional authors'
-% in the \additional authors block, viz.
-\additionalauthors{Michael Driscoll (Metamarkets, \texttt{mike@metamarkets.com})}
\date{21 March 2013}
-% Just remember to make sure that the TOTAL number of authors
-% is the number that will appear on the first page PLUS the
-% number that will appear in the \additionalauthors section.
\maketitle
-\begin{abstract}
-Druid is an open source\footnote{\href{https://github.com/metamx/druid}{https://github.com/metamx/druid}}, real-time analytical data store that supports
-fast ad-hoc queries on large-scale data sets. The system combines a
-column-oriented data layout, a shared-nothing architecture, and an advanced
-indexing structure to allow for the arbitrary exploration of billion-row
-tables with sub-second latencies. Druid scales horizontally and is the
-core engine of the Metamarkets data analytics platform. In this paper, we detail Druid's architecture, and describe how it supports real-time data ingestion and interactive analytical queries.
+\begin{abstract}
+Druid is an open
+source\footnote{\href{https://github.com/metamx/druid}{https://github.com/metamx/druid}}
+data store designed for real-time exploratory analytics on large data sets.
+The system combines a column-oriented storage layout, a distributed,
+shared-nothing architecture, and an advanced indexing structure to allow for
+the arbitrary exploration of billion-row tables with sub-second latencies. In
+this paper, we describe Druid's architecture, and detail how it supports fast
+aggregations, flexible filters, and low latency data ingestion.
\end{abstract}
-\section{Introduction}
-Enterprises routinely collect diverse data sets that can contain up to terabytes of information per day. Companies are increasingly realizing the importance of efficiently storing and analyzing this data in order to increase both productivity and profitability. Numerous database systems (e.g., IBM’s Netezza \cite{singh2011introduction}, HP's Vertica \cite{bear2012vertica}, EMC’s Greenplum \cite{miner2012unified}) and several research papers \cite{barroso2009datacenter, chaudhuri1997overview, dewitt1992parallel} offer solutions for how to store and extract information from large data sets. However, many of these Relational Database Management Systems (RDBMS) and NoSQL architectures do not support interactive queries and real-time data ingestion.
+\section{Introduction}
+In recent years, the proliferation of internet technology has
+created a surge in machine-generated events. Individually, these
+events contain minimal useful information and are of low value. Given the
+time and resources required to extract meaning from large collections of
+events, many companies were willing to discard this data instead. Although
+infrastructure has been built handle event based data (e.g. IBM's
+Netezza\cite{singh2011introduction}, HP's Vertica\cite{bear2012vertica}, and EMC's
+Greenplum\cite{miner2012unified}), they are largely sold at high price points
+and are only targeted towards those companies who can afford the offerings.
-Metamarkets built Druid to directly address the need for a real-time analytical data store in the big-data ecosystem. Druid shares some similarities with main-memory databases \cite{farber2012sap} and interactive query systems such as Dremel \cite{melnik2010dremel} and PowerDrill \cite{hall2012processing}. Druid's focus is fast aggregations, arbitrarily deep data exploration, and low-latency data ingestion. Furthermore, Druid is highly configurable and allows users to easily adjust fault tolerance and performance properties. Queries on in-memory data typically complete in milliseconds, and real-time data ingestion means that new events are immediately available for analysis.
+A few years ago, Google introduced MapReduce \cite{dean2008mapreduce} as their
+mechanism of leveraging commodity hardware to index the internet and analyze
+logs. The Hadoop \cite{shvachko2010hadoop} project soon followed and was
+largely patterned after the insights that came out of the original MapReduce
+paper. Hadoop is currently deployed in many organizations to store and analyze
+large amounts of log data. Hadoop has contributed much to helping companies
+convert their low-value event streams into high-value aggregates for a variety
+of applications such as business intelligence and A-B testing.
-In this paper, we make the following contributions:
-\begin{itemize}
-\item We describe Druid’s real-time data ingestion implementation.
-\item We detail how the architecture enables fast multi-dimensional data exploration.
-\item We present Druid performance benchmarks.
-\end{itemize}
+As with a lot of great systems, Hadoop has opened our eyes to a new space of
+problems. Specifically, Hadoop excels at storing and providing access to large
+amounts of data, however, it does not make any performance guarantees around
+how quickly that data can be accessed. Furthermore, although Hadoop is a
+highly available system, performance degrades under heavy concurrent load.
+Lastly, while Hadoop works well for storing data, it is not optimized for
+ingesting data and making that data immediately readable.
-The outline is as follows: Section \ref{sec:data-model} describes the Druid data model. Section \ref{sec:cluster} presents an overview of the components of a Druid cluster. Section \ref{sec:query-api} outlines the query API. Section \ref{sec:storage} describes data storage format in greater detail. Section \ref{sec:robustness} discusses Druid robustness and failure responsiveness. Section \ref{sec:benchmarks} presents experiments benchmarking query performance. Section \ref{sec:related} discusses related work and Section \ref{sec:conclusions} presents our conclusions.
+Early on in the development of the Metamarkets product, we ran into each of
+these issues and came to the realization that Hadoop is a great back-office,
+batch processing, and data warehousing system. However, as a company that has
+product-level guarantees around query performance and data availability in a
+highly concurrent environment (1000+ users), Hadoop wasn't going to meet our
+needs. We explored different solutions in the space, and after
+trying both Relational Database Management Systems and NoSQL architectures, we
+came to the conclusion that there was nothing in the open source world that
+could be fully leveraged for our requirements.
-\section{Data Model}
-\label{sec:data-model}
-The fundamental storage unit in Druid is the segment. Each table in Druid (called a \emph{data source})
-is partitioned into a collection of segments, each typically comprising 5--10 million rows. A sample table
-containing advertising data is shown in Table~\ref{tab:sample_data}. Many core Druid concepts can be described
-using this simple table.
+We ended up creating Druid, an open-source, distributed, column-oriented,
+realtime analytical data store. In many ways, Druid shares similarities with
+other interactive query systems \cite{melnik2010dremel}, main-memory databases
+\cite{farber2012sap}, and widely-known distributed data stores such as BigTable
+\cite{chang2008bigtable}, Dynamo \cite{decandia2007dynamo}, and Cassandra
+\cite{lakshman2010cassandra}. The distribution and query model also
+borrow ideas from current generation search infrastructure
+\cite{linkedin2013senseidb, apache2013solr, banon2013elasticsearch}.
+
+This paper describes the architecture of Druid, explores the various design
+decisions made in creating an always-on production system that powers a hosted
+service, and attempts to help inform anyone who faces a similar problem about a
+potential method of solving it. Druid is deployed in production at several technology companies\footnote{\href{http://druid.io/druid.html}{http://druid.io/druid.html}}.
+
+The structure of the paper is as follows: we first describe the problem in
+Section \ref{sec:problem-definition}. Next, we detail system architecture from
+the point of view of how data flows through the system in Section
+\ref{sec:architecture}. We then discuss how and why data gets converted into a
+binary format in Section \ref{sec:storage-format}. We briefly describe the
+query API in Section \ref{sec:query-api}. Lastly, we leave off with some
+benchmarks in Section \ref{sec:benchmarks}, related work in Section
+\ref{sec:related} and conclusions are Section \ref{sec:conclusions}.
+
+\section{Problem Definition}
+\label{sec:problem-definition}
+
+Druid was originally designed to solve problems around ingesting and exploring
+large quantities of transactional events (log data). This form of timeseries data is
+commonly found in OLAP workflows and the nature of the data tends to be very
+append heavy. For example, consider the data shown in
+Table~\ref{tab:sample_data}. Table~\ref{tab:sample_data} contains data for
+edits that have occured on Wikipedia. Each time a user edits a page in
+Wikipedia, an event is generated that contains metadata about the edit. This
+metadata is comprised of 3 distinct components. First, there is a timestamp
+column indicating when the edit was made. Next, there are a set dimension
+columns indicating various attributes about the edit such as the page that was
+edited, the user who made the edit, and the location of the user. Finally,
+there are a set of metric columns that contain values (usually numeric) to
+aggregate over, such as the number of characters added or removed in an edit.
\begin{table*}
\centering
- \caption{Sample Druid data}
+ \caption{Sample Druid data for edits that have occurred on Wikipedia.}
\label{tab:sample_data}
\begin{tabular}{| l | l | l | l | l | l | l | l |}
\hline
- \textbf{Timestamp} & \textbf{Publisher} & \textbf{Advertiser} & \textbf{Gender} & \textbf{Country} & \textbf{Impressions} & \textbf{Clicks} & \textbf{Revenue} \\ \hline
- 2011-01-01T01:00:00Z & bieberfever.com & google.com & Male & USA & 1800 & 25 & 15.70 \\ \hline
- 2011-01-01T01:00:00Z & bieberfever.com & google.com & Male & USA & 2912 & 42 & 29.18 \\ \hline
- 2011-01-01T02:00:00Z & ultratrimfast.com & google.com & Male & USA & 1953 & 17 & 17.31 \\ \hline
- 2011-01-01T02:00:00Z & ultratrimfast.com & google.com & Male & USA & 3194 & 170 & 34.01 \\ \hline
+ \textbf{Timestamp} & \textbf{Page} & \textbf{Username} & \textbf{Gender} & \textbf{City} & \textbf{Characters Added} & \textbf{Characters Removed} \\ \hline
+ 2011-01-01T01:00:00Z & Justin Bieber & Boxer & Male & San Francisco & 1800 & 25 \\ \hline
+ 2011-01-01T01:00:00Z & Justin Bieber & Reach & Male & Waterloo & 2912 & 42 \\ \hline
+ 2011-01-01T02:00:00Z & Ke\$ha & Helz & Male & Calgary & 1953 & 17 \\ \hline
+ 2011-01-01T02:00:00Z & Ke\$ha & Xeno & Male & Taiyuan & 3194 & 170 \\ \hline
\end{tabular}
\end{table*}
-Druid always requires a timestamp column as a method of simplifying data distribution policies, data retention policies, and first-level query pruning. Druid partitions its data sources into well-defined time intervals, typically an hour or a day, and may further partition on values from other columns to achieve the desired segment size. Segments are uniquely identified by a data source
-identifer, the time interval of the data, a version string that increases whenever a new segment is created, and a partition number. This segment metadata is used by the system for concurrency control; read operations always access data in a particular time range
-from the segments with the latest version identifier for that time
-range.
+Our goal is to rapidly compute drill-downs and aggregates over this data. We
+want to answer questions like “How many edits were made on the page Justin
+Bieber from males in San Francisco?” and “What is the average number of
+characters that were added by people from Calgary over the span of a month?”. We also
+want queries over any arbitrary combination of dimensions to return with
+sub-second latencies.
-Most segments in a Druid cluster are immutable \emph{historical} segments. Such segments are persisted on local disk or in a distributed filesystem ("deep" storage) such as S3 \cite{decandia2007dynamo} or HDFS \cite{shvachko2010hadoop}. All historical
-segments have associated metadata describing properties of the segment
-such as size in bytes, compression format, and location in deep
-storage. Data for intervals covered by historical segments can be updated by creating new historical segments that obsolete the old ones.
+The need for Druid was faciliated by the fact that existing open source
+Relational Database Management Systems and NoSQL key/value stores were unable
+to provide a low latency data ingestion and query platform for interactive
+applications \cite{tschetter2011druid}. In the early days of Metamarkets, the
+company was focused on building a web-based dashboard that would allow users to
+arbitrary explore and visualize event streams. Interactivity was very important
+to us; we didn't want our users sitting around waiting for their data
+visualizations to update.
-Segments covering very recent intervals are mutable \emph{real-time} segments. Real-time segments are incrementally updated as new events are ingested, and are available for queries throughout the incremental indexing process. Periodically, real-time segments are converted into
-historical segments through a finalization and handoff process described in Section~\ref{sec:realtime}.
+In addition to the query latency needs, the system had to be multi-tenant and
+highly available. Downtime is costly and many businesses cannot afford to wait
+if a system is unavailable in the face of software upgrades or network failure.
+Downtime for startups, many of whom have no internal operations teams, can
+mean the difference between business success and failure.
-Druid is best used for aggregating event streams, and both historical and real-time segments are built through an incremental indexing process that takes advantage of this assumption. Incremental indexing works by computing running aggregates of interesting metrics (e.g. number of impressions, sum of revenue from the data in Table~\ref{tab:sample_data}) across all rows that have identical attributes (e.g. publisher, advertiser). This often produces an order of magnitude compression in the data without sacrificing analytical value. Of course, this comes at the cost of not being able to support queries over the non-aggregated metrics.
+Finally, another key problem that Metamarkets faced in the early stages of the
+company was to allow users and alerting systems to be able to make business
+decisions in real-time. The time from when an event was created to when that
+event could be queried determined how fast users and systems were able to react
+to potentially catastrophic occurences in their systems.
-\section{Cluster}
-\label{sec:cluster}
-A Druid cluster consists of different types of nodes, each performing
-a specific function. The composition of a Druid cluster is shown in
-Figure~\ref{fig:druid_cluster}.
+The problems of data exploration, ingestion, and availability span multiple
+industries. Since Druid was open sourced in October 2012, it been deployed as a
+video, network monitoring, operation monitoring, and advertising analytics
+platform.
+
+\section{Architecture}
+\label{sec:architecture}
+A Druid cluster consists of different types of nodes and each node type is
+designed to perform a very specific set of things. We believe this design
+allows for a separation of functionality concerns and simplifies the
+architecture and complexity of the system. There is minimal interaction
+between the different node types and hence, intra-cluster communication
+failures have minimal impact on data availability. The different node types
+operate fairly independent of each other. To solve complex data analysis
+problems, the node types come together to form a fully working system. The
+name Druid comes from the Druid class in many role-playing games: it is a
+shape-shifter, capable of taking many different forms to fulfill various
+different roles in a group. The composition and flow of data of a Druid
+cluster are shown in Figure~\ref{fig:cluster}.
\begin{figure*}
\centering
-\includegraphics[width = 4.5in]{druid_cluster}
-\caption{An overview of a Druid cluster.}
-\label{fig:druid_cluster}
+\includegraphics[width = 4.5in]{cluster}
+\caption{An overview of a Druid cluster and the flow of data through the cluster.}
+\label{fig:cluster}
\end{figure*}
-Recall that the Druid data model has the notion of historical and real-time segments. The Druid cluster is architected to reflect this
-conceptual separation of data. Real-time nodes are responsible for
-ingesting, storing, and responding to queries for the most recent
-events. Similarly, historical historical nodes are responsible for
-loading and responding to queries for historical events.
+\subsection{Real-time Nodes}
+\label{sec:realtime}
+Real-time nodes encapsulate the functionality to ingest and query real-time
+event streams. Events indexed via these nodes are immediately available for
+querying. The nodes are only concerned with events for some small time range
+and periodically hand off immutable batches of events they've collected over
+this small time range to other nodes in the Druid cluster that are specialized
+in dealing with batches of immutable events.
-Data in Druid is stored on storage nodes. Storage nodes can be either
-compute or real-time nodes. Queries to access this data will
-typically first hit a layer of broker nodes. Broker nodes are
-responsible for finding and routing queries down to the storage nodes
-that host the pertinent data. The storage nodes compute their portion
-of the query response in parallel and return their results to the
-brokers. Broker nodes, historical nodes, and realtime nodes are jointly
-classified as queryable nodes.
+Real-time nodes maintain an in-memory index buffer for all incoming events.
+These indexes are incrementally populated as new events are ingested and the
+indexes are also directly queryable. Druid virtually behaves as a row store
+for queries on events that exist in this JVM heap-based buffer. To avoid heap overflow
+problems, real-time nodes persist their in-memory indexes to disk either
+periodically or after some maximum row limit is reached. This persist process
+converts data stored in the in-memory buffer to a column oriented storage
+format described in \ref{sec:storage-format}. Each persisted index is immutable and
+real-time nodes load persisted indexes into off-heap memory such that they can
+still be queried.
-Druid also has a set of coordination nodes to manage load assignment,
-distribution, and replication. Coordination nodes are not queryable
-and instead focus on maintaining cluster stability. Coordination nodes
-have an external dependency on a MySQL database.
+Real-time nodes maintain a consolidated view of their in-memory index and of
+all indexes persisted to disk. This unified view allows all indexes on a node
+to be queried. On a periodic basis, each node will schedule a background task
+that searches for all locally persisted indexes. The task merges these indexes
+together and builds an immutable block of data that contains all the events
+that have ingested by a real-time node for some span of time. We refer to this
+block of data as a "segment". During the hand-off stage, a real-time node
+uploads this segment to a permanent backup storage, typically a distributed
+file system such as S3 \cite{decandia2007dynamo} or HDFS
+\cite{shvachko2010hadoop}, which Druid refers to as "deep storage". The ingest,
+persist, merge, and handoff steps are fluid; there is no data loss during this
+process. Figure~\ref{fig:realtime_flow} illustrates the process.
-Druid relies on Apache Zookeeper \cite{hunt2010zookeeper}
-for coordination. Most intra-cluster communication is over Zookeeper, although
-queries are typically forwarded over HTTP.
+\begin{figure}
+\centering
+\includegraphics[width = 2.8in]{realtime_flow}
+\caption{Real-time nodes first buffer events in memory. After some period of
+time, in-memory indexes are persisted to disk. After another period of time,
+all persisted indexes are merged together and handed off. Queries on data hit
+both the in-memory index and the persisted indexes.}
+\label{fig:realtime_flow}
+\end{figure}
-\subsection{Historical historical nodes}
-Historical historical nodes are the main workers of a Druid cluster and
-are self-contained and self-sufficient. historical nodes load historical
-segments from permanent/deep storage and expose them for
-querying. There is no single point of contention between the nodes and
-nodes have no knowledge of one another. historical nodes are
-operationally simple; they only know how to perform the tasks they are
-assigned. To help other services discover historical nodes and the data
-they hold, every historical node maintains a constant Zookeeper
-connection. historical nodes announce their online state and the segments
-they serve by creating ephemeral nodes under specifically configured
-Zookeeper paths. Instructions for a given historical node to load new
-segments or drop existing segments are sent by creating ephemeral
-znodes under a special “load queue” path associated with the compute
-node. Figure~\ref{fig:zookeeper} illustrates a simple historical node and Zookeeper interaction.
-Each historical node announces themselves under an "announcements" path when they come online
-and each compute has a load queue path associated with it.
+Real-time nodes leverage Zookeeper \cite{hunt2010zookeeper} for coordination
+with the rest of the Druid cluster. The nodes announce their online state and
+the data they are serving in Zookeeper. To better understand the flow of data
+through a real-time node, consider the following example. First, we start a
+real-time node at 13:37. The node will announce that it is serving a segment of
+data for a period of time from 13:00 to 14:00 and will only accept events with
+timestamps in this time range. Every 10 minutes (the persist period is
+configurable), the node will flush and persist its in-memory buffer to disk.
+Near the end of the hour, the node will likely see events with timestamps from
+14:00 to 15:00. When this occurs, the real-time node prepares to serve data for
+the next hour by creating a new in-memory index and announces that it is also
+serving a segment for data from 14:00 to 15:00. The node does not immediately
+merge the indexes it persisted from 13:00 to 14:00, instead it waits for a
+configurable window period for straggling events from 13:00 to 14:00 to come
+in. Having a window period minimizes the risk of data loss from delays in event
+delivery. At the end of the window period, the real-time node merges all
+persisted indexes from 13:00 to 14:00 into a single immutable segment and hands
+the segment off. Once this segment is loaded and queryable somewhere else in
+the Druid cluster, the real-time node flushes all information about the data it
+has collected for 13:00 to 14:00 and unannounces it is serving this data. This
+process is shown in Figure~\ref{fig:realtime_timeline}.
+
+\begin{figure*}
+\centering
+\includegraphics[width = 4.5in]{realtime_timeline}
+\caption{A timelime that represents the typical operations a real-time node
+undergoes. The node starts, ingests data, persists, and periodically hands data
+off. This process repeats indefinitely. The time intervals between different
+real-time node operations are configurable.}
+\label{fig:realtime_timeline}
+\end{figure*}
+
+\subsubsection{Availability and Scalability}
+Real-time nodes are a consumer of data and require a corresponding producer to
+provide the data stream. Typically, for data durability purposes, a message
+bus such as Kafka \cite{kreps2011kafka} sits between the producer and the
+real-time node as shown in Figure~\ref{fig:realtime_pipeline}. Real-time nodes
+ingest data by reading events from the message bus. The time from event
+creation to event consumption is typically on the order of hundreds of
+milliseconds.
\begin{figure}
\centering
-\includegraphics[width = 2.8in]{zookeeper}
-\caption{historical nodes create ephemeral znodes under specifically configured Zookeeper paths.}
-\label{fig:zookeeper}
+\includegraphics[width = 2.8in]{realtime_pipeline}
+\caption{Multiple real-time nodes can read from the same message bus. Each node maintains its own offset.}
+\label{fig:realtime_pipeline}
\end{figure}
-To expose a segment for querying, a historical node must first possess a
-local copy of the segment. Before a historical node downloads a segment
-from deep storage, it first checks a local disk directory (cache) to
-see if the segment already exists in local storage. If no cache
-information about the segment is present, the historical node will
-download metadata about the segment from Zookeeper. This metadata
-includes information about where the segment is located in deep
-storage and about how to decompress and process the segment. Once a
-historical node completes processing a segment, the node announces (in
-Zookeeper) that it is serving the segment. At this point, the segment
-is queryable.
+The purpose of the message bus in Figure~\ref{fig:realtime_pipeline} is two-fold.
+First, the message bus acts as a buffer for incoming events. A message bus such
+as Kafka maintains offsets indicating the position in an event stream that a
+consumer (a real-time node) has read up to and consumers can programatically
+update these offsets. Typically, real-time nodes update this offset each time
+they persist their in-memory buffers to disk. This means that in a fail and
+recover scenario, if a node has not lost disk, it can reload all persisted
+indexes from disk and continue reading events from the last offset it
+committed. Ingesting events from a recently committed offset greatly reduces a
+node's recovery time. In practice, we see real-time nodes recover from such
+failure scenarios in an order of seconds.
+The second purpose of the message bus is to act as a single endpoint from which
+multiple real-time nodes can read events. Multiple real-time nodes can ingest
+the same set of events from the bus, thus creating a replication of events. In
+a scenario where a node completely fails and does not recover, replicated
+streams ensure that no data is lost. A single ingestion endpoint also allows
+for data streams for be partitioned such that multiple real-time nodes may each
+ingest a portion of a stream. This allows additional real-time nodes to be
+seamlessly added. In practice, this model has allowed the largest production
+Druid cluster that runs real-time nodes be able to consume raw data at
+approximately 500 MB/s (150,000 events/s or 2 TB/hour).
+
+\subsection{Historical Nodes}
+Historical nodes encapsulate the functionality to load and serve the immutable
+blocks of data (segments) created by real-time nodes. In many real-world workflows, most
+of the data loaded in a Druid cluster is immutable and hence, historical nodes
+are typically the main workers of a Druid cluster. Historical nodes follow a
+shared-nothing architecture and there is no single point of contention among
+the nodes. The nodes have no knowledge of one another and are operationally
+simple; they only know how to load, drop, and serve immutable segments.
+
+Similar to real-time nodes, historical nodes announce their online state and
+the data they are serving in Zookeeper. Instructions to load and drop segments
+are sent over Zookeeper and contain information about where the segment is
+located in deep storage and about how to decompress and process the segment.
+Before a historical node downloads a particular segment from deep storage, it
+first checks a local cache that maintains information about what segments
+already exist on the node. If information about a segment is not present, the
+historical node will proceed to download the segment from deep storage. This
+process is shown in Figure~\ref{fig:historical_download}. Once processing is
+complete, the availability of the segment is announced. At this point, the
+segment is queryable. The local cache also allows for historical nodes to be
+quickly updated and restarted. On startup, the node examines its cache and
+immediately serves whatever data it finds.
+
+\begin{figure}
+\centering
+\includegraphics[width = 2.8in]{historical_download}
+\caption{Historical nodes download immutable segments from deep storage.}
+\label{fig:historical_download}
+\end{figure}
+
+Historical nodes can support read consistency because they only deal with
+immutable data. Immutable data blocks also enable a simple parallelization
+model: historical nodes can scan and aggregate immutable blocks concurrently
+without blocking.
+
\subsubsection{Tiers}
\label{sec:tiers}
-historical nodes can be grouped in different tiers, where all nodes in a
+Historical nodes can be grouped in different tiers, where all nodes in a
given tier are identically configured. Different performance and
fault-tolerance parameters can be set for each tier. The purpose of
tiered nodes is to enable higher or lower priority segments to be
distributed according to their importance. For example, it is possible
to spin up a “hot” tier of historical nodes that have a high number of
-cores and a large RAM capacity. The “hot” cluster can be configured to
-download more frequently accessed segments. A parallel “cold” cluster
+cores and large memory capacity. The “hot” cluster can be configured to
+download more frequently accessed data. A parallel “cold” cluster
can also be created with much less powerful backing hardware. The
“cold” cluster would only contain less frequently accessed segments.
-\subsection{Real-time Nodes}
-\label{sec:realtime}
-Real-time nodes encapsulate the functionality to ingest and query
-real-time data streams. Data indexed via these nodes is immediately
-available for querying. Real-time nodes are a consumer of data and
-require a corresponding producer to provide the data
-stream. Typically, for data durability purposes, a message bus such as
-Kafka \cite{kreps2011kafka} sits between the producer and the real-time node as shown
-in Figure~\ref{fig:data_reading}.
-
-The purpose of the message bus in Figure~\ref{fig:data_reading} is to act as a buffer for
-incoming events. The message bus can maintain offsets indicating the
-position in an event stream that a real-time node has read up to and
-real-time nodes can update these offsets periodically. The message bus also acts as backup storage for recent events.
-Real-time nodes ingest data by reading events from the message bus. The time from event creation to message bus storage to
-event consumption is on the order of hundreds of milliseconds.
-
-Real-time nodes maintain an in-memory index buffer for all incoming
-events. These indexes are incrementally populated as new events appear on the message bus. The indexes are also directly queryable.
-Real-time nodes persist their indexes to disk either periodically or after some maximum row limit is
-reached. After each persist, a real-time node updates the message bus
-with the offset of the last event of the most recently persisted
-index. Each persisted index is immutable. If a real-time node fails and recovers, it can simply reload
-any indexes that were persisted to disk and continue reading the
-message bus from the point the last offset was committed. Periodically committing offsets reduces the number of messages a real-time
-node has to rescan after a failure scenario.
-
-Real-time nodes maintain a consolidated view of the currently updating
-index and of all indexes persisted to disk. This unified view allows
-all indexes on a node to be queried. On a periodic basis, the nodes will
-schedule a background task that searches for all persisted indexes of
-a data source. The task merges these indexes together and builds a
-historical segment. The nodes will upload the segment to deep storage
-and provide a signal for the historical historical nodes to begin serving
-the segment. The ingest, persist, merge, and handoff steps are fluid;
-there is no data loss as a real-time node converts a real-time segment
-to a historical one. Figure~\ref{fig:data-durability} illustrates the process.
-
-Similar to historical nodes, real-time nodes announce segments in
-Zookeeper. Unlike historical segments, real-time segments may
-represent a period of time that extends into the future. For example,
-a real-time node may announce it is serving a segment that contains
-data for the current hour. Before the end of the hour, the real-time
-node continues to collect data for the hour. Every 10 minutes (the
-persist period is configurable), the node will flush and persist its
-in-memory index to disk. At the end of the current hour, the real-time
-node prepares to serve data for the next hour by creating a new index
-and announcing a new segment for the next hour. The node does not
-immediately merge and build a historical segment for the previous hour
-until after some window period has passed. Having a window period
-allows for straggling data points to come in and minimizes the risk of
-data loss. At the end of the window period, the real-time node will
-merge all persisted indexes, build a historical segment for the
-previous hour, and hand the segment off to historical nodes to
-serve. Once the segment is queryable on the historical nodes, the
-real-time node flushes all information about the segment and
-unannounces it is serving the segment.
-
-Real-time nodes are highly scalable. If the data volume and ingestion
-rates for a given data source exceed the maximum capabilities of a
-single node, additional nodes can be added. Multiple nodes can
-consume events from the same stream, and every individual node only
-holds a portion of the total number of events. This creates natural
-partitions across nodes. Each node announces the real-time segment it
-is serving and each real-time segment has a partition number. Data
-from individual nodes will be merged at the broker level. To our
-knowledge, the largest production level real-time Druid cluster is
-consuming approximately 500MB/s (150,000 events/s or 2 TB/hour of raw data).
-
-\begin{figure}
-\centering
-\includegraphics[width = 2.8in]{data_reading}
-\caption{Real-time data ingestion.}
-\label{fig:data_reading}
-\end{figure}
-
-\begin{figure}
-\centering
-\includegraphics[width = 2.8in]{druid_realtime_flow}
-\caption{Real-time data durability}
-\label{fig:data-durability}
-\end{figure}
+\subsubsection{Availability}
+Historical nodes depend on Zookeeper for segment load and unload instructions.
+If Zookeeper becomes unavailable, historical nodes are no longer able to serve
+new data and drop outdated data, however, because the queries are served over
+HTTP, historical nodes are still be able to respond to query requests for
+the data they are currently serving. This means that Zookeeper outages do not
+affect data availability on historical nodes.
\subsection{Broker Nodes}
-Broker nodes act as query routers to other queryable nodes such as
-compute and real-time nodes. Broker nodes understand the metadata
-published in Zookeeper about what segments exist and on what nodes the
-segments are stored. Broker nodes route incoming queries such that the queries hit
-the right storage nodes. Broker nodes also merge partial results from
-storage nodes before returning a final consolidated result to the
-caller. Additionally, brokers provide an extra level of data
-durability as they maintain a cache of recent results. In the event
-that multiple storage nodes fail and all copies of a segment are
-somehow lost, it is still possible that segment results can still be
-returned if that information exists in the cache.
-
-\subsubsection{Timeline}
-To determine the correct nodes to forward queries to, brokers
-first build a view of the world from information in Zookeeper. Recall
-that Druid uses Zookeeper to maintain information about all compute
-and real-time nodes in a cluster and the segments those nodes are
-serving. For every data source in Zookeeper, the broker node builds a
-timeline of segments for the data source and the nodes that serve them. A timeline
-consists of segments and represents which segments contain data for
-what ranges of time. Druid may have multiple segments where the data
-source and interval are the same but versions differ. The timeline
-view will always surface segments with the most recent version
-identifier for a time range. If two segments intervals overlap, the segment with the more recent
-version always has precedence. When queries are received for a specific
-data source and interval, the broker node performs a lookup on the
-timeline associated with the query data source for the query interval
-and retrieves the segments that contain data for the query. The broker
-node maps these segments to the storage nodes that serve them and
-forwards the query down to the respective nodes.
-
+Broker nodes act as query routers to historical and real-time nodes. Broker
+nodes understand the metadata published in Zookeeper about what segments are
+queryable and where those segments are located. Broker nodes route incoming queries
+such that the queries hit the right historical or real-time nodes. Broker nodes
+also merge partial results from historical and real-time nodes before returning
+a final consolidated result to the caller.
\subsubsection{Caching}
\label{sec:caching}
-Broker nodes employ a distributed cache with a LRU \cite{o1993lru,
-kim2001lrfu} cache invalidation strategy. The broker cache stores
-per segment results. The cache can be local to each broker node or
-shared across multiple nodes using an external distributed cache
-such as memcached \cite{fitzpatrick2004distributed}. Recall that each time a broker node receives a
-query, it first maps the query to a set of segments. A subset of
-these segment results may already exist in the cache and the results
-can be directly pulled from the cache. For any segment results that
-do not exist in the cache, the broker node will forward the query
-to the historical nodes. Once the historical nodes return their results,
-the broker will store those results in the cache. Real-time segments
-are never cached and hence requests for real-time data will always
-be forwarded to real-time nodes. Real-time data is perpetually
-changing and caching the results would be unreliable.
+Broker nodes contain a cache with a LRU \cite{o1993lru, kim2001lrfu} cache
+invalidation strategy. The cache can use local heap memory or an external
+distributed store such as memcached \cite{fitzpatrick2004distributed}. Each
+time a broker node receives a query, it first maps the query to a set of
+segments. Results for certain segments may already exist in the cache and there
+is no need to recompute them. For any results that do not exist in the cache,
+the broker node will forward the query to the historical and real-time nodes.
+Once the historical nodes return their results, the broker will cache these
+results on a per segment basis for future use. This process is illustrated in
+Figure~\ref{fig:caching}. Real-time data is never cached and hence requests for
+real-time data will always be forwarded to real-time nodes. Real-time data is
+perpetually changing and caching the results would be unreliable.
+\begin{figure*}
+\centering
+\includegraphics[width = 4.5in]{caching}
+\caption{Broker nodes cache per segment results. Every Druid query is mapped to
+a set of segments. If segment results do not live in the cache, queries are
+forwarded down to historical and real-time nodes.}
+\label{fig:caching}
+\end{figure*}
-\subsection{Coordination (Master) Nodes}
-The Druid coordination or master nodes are primarily in charge of
-segment management and distribution. The Druid master is responsible
-for loading new segments, dropping outdated segments, managing segment
-replication, and balancing segment load. Druid uses a multi-version
-concurrency control swapping protocol for managing segments in order
-to maintain stable views.
+The cache also acts as an additional level of data durability. In the event
+that all historical nodes fail, it is still possible to query results if those
+results already exist in the cache.
-The Druid master runs periodically to determine the current state of
-the cluster. It makes decisions by comparing the expected state of the
-cluster with the actual state of the cluster at the time of the
-run. As with all Druid nodes, the Druid master maintains a connection
-to Zookeeper for current cluster information. The master also
-maintains a connection to a MySQL database that contains additional
-operational parameters and configurations. One of the key pieces of
-information located in the MySQL database is a segment table that
-contains a list of historical segments that should be served. This
-table can be updated by any service that creates historical
-segments. The MySQL database also contains a rule table that governs
-how segments are created, destroyed, and replicated in the cluster.
+\subsubsection{Availability}
+In the event of a total Zookeeper outage, data is still queryable. If broker
+nodes are unable to communicate to Zookeeper, they use their last known segment
+to node mapping and continue forwarding queries down to real-time and
+historical nodes. Broker nodes make the assumption that the structure of the
+cluster is the same as it was before the outage. In practice, this availability
+model has allowed our Druid cluster to continue serving queries for several
+hours while we diagnosed Zookeeper outages.
-The master does not directly communicate with a historical node when
-assigning it work; instead the master creates an ephemeral znode in
-Zookeeper containing information about what the historical node should
-do. The historical node maintains a similar connection to Zookeeper to
-monitor for new work.
+\subsection{Coordinator Nodes}
+The Druid coordinator nodes are primarily in charge of data management and
+distribution on historical nodes. The coordinator nodes tell historical nodes
+to load new data, drop outdated data, replicate data, and move data for load
+balancing. Druid uses a multi-version concurrency control swapping protocol for
+managing immutable segments in order to maintain stable views. If any
+immutable segment contains data that is wholly obseleted by newer segments, the
+outdated segment is dropped from the cluster. Coordinator nodes undergo a
+leader-election process that determines a single node that runs the coordinator
+functionality. The remaining coordinator nodes act as redundant backups.
+
+A coordinator node runs periodically to determine the current state of the
+cluster. It makes decisions by comparing the expected state of the cluster with
+the actual state of the cluster at the time of the run. As with all Druid
+nodes, coordinator nodes maintains a Zookeeper connection for current cluster
+information. The coordinator nodes also maintain a connection to a MySQL
+database that contains additional operational parameters and configurations.
+One of the key pieces of information located in the MySQL database is a table
+that contains a list of all segments that should be served by historical nodes.
+This table can be updated by any service that creates segments, for example,
+real-time nodes. The MySQL database also contains a rule table that governs how
+segments are created, destroyed, and replicated in the cluster.
\subsubsection{Rules}
-Rules govern how historical segments are loaded and dropped from the cluster.
-Rules indicate how segments should be assigned to
-different historical node tiers and how many replicates of a segment
-should exist in each tier. Rules may also indicate when segments
-should be dropped entirely from the cluster. Rules are usually set for a period of time.
-For example, a user may use rules to load the most recent one month's worth of segments into a "hot" cluster,
-the most recent one year's worth of segments into a "cold" cluster, and drop any segments that are older.
+Rules govern how historical segments are loaded and dropped from the cluster.
+Rules indicate how segments should be assigned to different historical node
+tiers and how many replicates of a segment should exist in each tier. Rules may
+also indicate when segments should be dropped entirely from the cluster. Rules
+are usually set for a period of time. For example, a user may use rules to
+load the most recent one month's worth of segments into a "hot" cluster, the
+most recent one year's worth of segments into a "cold" cluster, and drop any
+segments that are older.
-The master loads a set of
-rules from a rule table in the MySQL database. Rules may be specific
-to a certain data source and/or a default set of rules may be
-configured. The master will cycle through all available segments and
-match each segment with the first rule that applies to it.
+The coordinator nodes load a set of rules from a rule table in the MySQL
+database. Rules may be specific to a certain data source and/or a default set
+of rules may be configured. The master will cycle through all available
+segments and match each segment with the first rule that applies to it.
\subsubsection{Load Balancing}
-In a typical production environment, queries often hit dozens or even
-hundreds of data segments. Since each historical node has limited
-resources, historical segments must be distributed among the cluster
-to ensure that the cluster load is not too imbalanced. Determining
-optimal load distribution requires some knowledge about query patterns
-and speeds. Typically, queries cover recent data spanning contiguous
-time intervals for a single data source. On average, queries that
+In a typical production environment, queries often hit dozens or even hundreds
+of segments. Since each historical node has limited resources, segments must be
+distributed among the cluster to ensure that the cluster load is not too
+imbalanced. Determining optimal load distribution requires some knowledge about
+query patterns and speeds. Typically, queries cover recent segments spanning
+contiguous time intervals for a single data source. On average, queries that
access smaller segments are faster.
-These query patterns suggest replicating recent historical segments at
-a higher rate, spreading out large segments that are close in time to
-different historical nodes, and co-locating segments from different data
-sources. To optimally distribute and balance segments among the
-cluster, we developed a cost-based optimization procedure that takes
-into account the segment data source, recency, and size. The exact
-details of the algorithm are beyond the scope of this paper and may be discussed in future literature.
+These query patterns suggest replicating recent historical segments at a higher
+rate, spreading out large segments that are close in time to different
+historical nodes, and co-locating segments from different data sources. To
+optimally distribute and balance segments among the cluster, we developed a
+cost-based optimization procedure that takes into account the segment data
+source, recency, and size. The exact details of the algorithm are beyond the
+scope of this paper and may be discussed in future literature.
-\section{Query API}
-\label{sec:query-api}
-Druid has its own query language and accepts queries as POST requests. All queryable
-Druid nodes share the same query API.
+\subsubsection{Replication}
+Coordinator nodes may tell different historical nodes to load copies of the
+same segment. The number of replicates in each tier of the historical compute
+cluster is fully configurable. Setups that require high levels of fault
+tolerance can be configured to have a high number of replicates. Replicated
+segments are treated the same as the originals and follow the same load
+distribution algorithms. By replicating segments, single historical node
+failures are transparent in the Druid cluster. We use this property to our
+advantage for software upgrades. We can seamlessly take a historical node
+offline, update it, bring it back up, and repeat the process for every
+historical node in a cluster. Over the last two years, we have never taken
+downtime in our Druid cluster for software upgrades.
-The body of the POST request is
-a JSON object containing key-value pairs specifying various query
-parameters. A typical query will contain the data source name, the
-granularity of the result data, time range of interest, the
-type of request, and the metrics to aggregate over. The result will also be a JSON object
-containing the aggregated metrics over the time period.
+\subsubsection{Availability}
+Druid coordinator nodes have two external dependencies: Zookeeper and MySQL.
+Coordinator nodes rely on Zookeeper to determine what historical nodes already
+exist in the cluster. If Zookeeper becomes unavailable, the coordinator will no
+longer be able to assign, balance, and drop segments. These operations do not
+affect data availability at all and all data in the historical cluster should
+still be queryable.
-Most query types will also support a filter set. A filter set is a Boolean expression of dimension name and value
-pairs. Any number and combination of dimensions and values may be specified.
-When a filter set is provided, only the subset of the data that pertains to the filter set will be scanned.
-The ability to handle complex nested filter sets is what enables Druid
-to drill into data at any depth.
+The design principle for responding to MySQL and Zookeeper failures is the
+same: if an external dependency responsible for coordination fails, the cluster
+maintains the status quo. Druid uses MySQL to store operational management
+information and segment metadata information about what segments should exist
+in the cluster. If MySQL goes down, this information becomes unavailable to
+coordinator nodes. However, this does not mean data itself is not available. If
+coordinator nodes cannot communicate to MySQL, they will cease to assign new
+segments and drop outdated ones. Historical and real-time nodes are still
+queryable during MySQL outages.
-The exact query syntax depends on the query type and the information requested.
-A sample time series query is shown below:
-\begin{verbatim}
-{
- "queryType" : "timeseries",
- "dataSource" : "sample_data",
- "intervals" : "2013-01-01/2013-01-02",
- "filter" : {
- "type" : "selector",
- "dimension" : "poets",
- "value" : "Ke$ha"
- },
- "granularity" : "day",
- "aggregations" : [
- {
- "type" : "count",
- "fieldName" : "row",
- "name" : "row"
- }
- ]
-}
-\end{verbatim}
+\section{Storage Format}
+\label{sec:storage-format}
+Data tables in Druid (called \emph{data sources}) are collections of
+timestamped events and partitioned into a set of segments, where each segment
+is typically 5--10 million rows. Formally, we define a segment as a collection
+of rows of data that span some period in time. Segments represent the
+fundamental storage unit in Druid and replication and distribution are done at
+a segment level.
+
+Druid always requires a timestamp column as a method of simplifying data
+distribution policies, data retention policies, and first-level query pruning.
+Druid partitions its data sources into well-defined time intervals, typically
+an hour or a day, and may further partition on values from other columns to
+achieve the desired segment size. For example, partitioning the data in
+Table~\ref{tab:sample_data} by hour results in two segments for 2011-01-01, and
+partitioning the data by day results in a single segment. The time granularity
+to partition segments is a function of data volume and time range. A data set
+with timestamps spread over a year is better partitioned by day, and a data set
+with timestamps spread over a day is better partitioned by hour.
-It is beyond the scope of this paper to fully describe the query API.
-We are also in the process of extending the Druid API to understand SQL.
+Segments are uniquely identified by a data source identifer, the time interval
+of the data, and a version string that increases whenever a new segment is
+created. The version string indicates the freshness of segment data; segments
+with later versions have newer views of data (over some time range) than
+segments with older versions. This segment metadata is used by the system for
+concurrency control; read operations always access data in a particular time
+range from the segments with the latest version identifiers for that time
+range.
-\section{Storage}
-\label{sec:storage}
-Druid is a column-oriented data store. When considering aggregates
-over a large number of events, the advantages storing data as columns
-rather than rows are well documented \cite{cattell2011scalable}. Column storage allows for
-more efficient CPU usage as only what is needed is actually loaded and
-scanned. In a row oriented data store, all columns associated with a
-row must be scanned as part of an aggregation. The additional scan
-time can introduce performance degradations as high as 250\% \cite{bear2012vertica}.
+Druid segments are stored in a column orientation. Given that Druid is best
+used for aggregating event streams (all data going into Druid must have a
+timestamp), the advantages storing aggregate information as columns rather than
+rows are well documented \cite{abadi2008column}. Column storage allows for more
+efficient CPU usage as only what is needed is actually loaded and scanned. In a
+row oriented data store, all columns associated with a row must be scanned as
+part of an aggregation. The additional scan time can introduce performance
+degradations as high as 250\% \cite{bear2012vertica}.
-\subsection{Column Types}
-Druid has multiple column types to represent the various column value
-formats. Depending on the column type, different compression methods
-are used to reduce the cost of storing a column in memory and on
-disk. In the example given in Table~\ref{tab:sample_data}, the
-publisher, advertiser, gender, and country columns only contain
-strings. String columns can be dictionary encoded. Dictionary encoding
-is a common method to compress data and has been used in other data
-stores such as PowerDrill \cite{hall2012processing}. In the example in
+Druid has multiple column types to represent various data formats. Depending on
+the column type, different compression methods are used to reduce the cost of
+storing a column in memory and on disk. In the example given in
+Table~\ref{tab:sample_data}, the page, user, gender, and city columns only
+contain strings. String columns can be dictionary encoded. Dictionary encoding
+is a common method to compress data and has been used in other data stores such
+as PowerDrill \cite{hall2012processing}. In the example in
Table~\ref{tab:sample_data}, we can map each publisher to an unique
integer identifier.
\begin{verbatim}
-bieberfever.com -> 0
-ultratrimfast.com -> 1
+Justin Bieber -> 0
+Ke$ha -> 1
\end{verbatim}
-This mapping allows us to represent the publisher column as an integer
+This mapping allows us to represent the page column as an integer
array where the array indices correspond to the rows of the original
-data set. For the publisher column, we can represent the unique
-publishers as follows:
+data set. For the page column, we can represent the unique
+pages as follows:
\begin{verbatim}
[0, 0, 1, 1]
\end{verbatim}
@@ -534,39 +549,42 @@ extremely common in column-stores. Druid uses the LZF \cite{liblzf2013} compress
algorithm.
Similar compression methods can be applied to numeric
-columns. For example, the clicks and revenue columns in
+columns. For example, the characters added and characters removed columns in
Table~\ref{tab:sample_data} can also be expressed as individual
arrays.
\begin{verbatim}
-Clicks -> [25, 42, 17, 170]
-Revenue -> [15.70, 29.18, 17.31, 34.01]
+Characters Added -> [1800, 2912, 1953, 3194]
+Characters Removed -> [25, 42, 17, 170]
\end{verbatim}
-In this case we compress the raw values as opposed to their dictionary
+In this case, we compress the raw values as opposed to their dictionary
representations.
-\subsection{Filters}
-To support arbitrary filter sets, Druid creates additional lookup
-indices for string columns. These lookup indices are compressed and
-Druid operates over the indices in their compressed form. Filters can
-be expressed as Boolean equations of multiple lookup indices. Boolean
-operations of indices in their compressed form is both performance and
-space efficient.
+\subsection{Inverted Indices}
+In most real world data analytic workflows, queries are issued for the
+aggregated results for some set of metrics where some set of dimension
+specifications are met. For example, "how many Wikipedia edits were done by
+users in San Francisco who are also male"? These queries are filtering the data
+based on some a boolean expression of dimension values. In many real world data
+sets, string columns are typically dimension columns and metric columns are
+typically numeric columns. Druid creates additional lookup indices for string
+columns such that only those rows that pertain to a particular query filter are
+ever scanned.
-Let us consider the publisher column in
-Table~\ref{tab:sample_data}. For each unique publisher in
+Let us consider the page column in
+Table~\ref{tab:sample_data}. For each unique page in
Table~\ref{tab:sample_data}, we can form some representation
-indicating which table rows a particular publisher is seen. We can
+indicating in which table rows a particular page is seen. We can
store this information in a binary array where the array indices
-represent our rows. If a particular publisher is seen in a certain
+represent our rows. If a particular page is seen in a certain
row, that array index is marked as \texttt{1}. For example:
\begin{verbatim}
-bieberfever.com -> rows [0, 1] -> [1][1][0][0]
-ultratrimfast.com -> rows [2, 3] -> [0][0][1][1]
+Justin Bieber -> rows [0, 1] -> [1][1][0][0]
+Ke$ha -> rows [2, 3] -> [0][0][1][1]
\end{verbatim}
-\texttt{\href{http://bieberfever.com}{bieberfever.com}} is seen in rows \texttt{0} and \texttt{1}. This mapping of column values
+\texttt{Justin Bieber} is seen in rows \texttt{0} and \texttt{1}. This mapping of column values
to row indices forms an inverted index \cite{tomasic1993performance}. To know which
-rows contain {\ttfamily bieberfever.com} or {\ttfamily ultratrimfast.com}, we can \texttt{OR} together
+rows contain {\ttfamily Justin Bieber} or {\ttfamily Ke\$ha}, we can \texttt{OR} together
the two arrays.
\begin{verbatim}
[0][1][0][1] OR [1][0][1][0] = [1][1][1][1]
@@ -579,95 +597,136 @@ the two arrays.
\label{fig:concise_plot}
\end{figure}
-This approach of performing Boolean operations on large bitmap sets is
-commonly used in search engines. Bitmap compression algorithms are a
-well-defined area of research and often utilize run-length
-encoding. Popular algorithms include Byte-aligned Bitmap Code \cite{antoshenkov1995byte},
-Word-Aligned Hybrid (WAH) code \cite{wu2006optimizing}, and Partitioned Word-Aligned
-Hybrid (PWAH) compression \cite{van2011memory}. Druid opted to use the Concise
-algorithm \cite{colantonio2010concise} as it can outperform WAH by reducing the size of the
-compressed bitmaps by up to 50\%. Figure~\ref{fig:concise_plot} illustrates the number of
-bytes using Concise compression versus using an integer array. The
-results were generated on a cc2.8xlarge system with a single thread,
-2G heap, 512m young gen, and a forced GC between each run. The data
-set is a single day’s worth of data collected from the Twitter garden
-hose \cite{twitter2013} data stream. The data set contains 2,272,295 rows and 12
-dimensions of varying cardinality. As an additional comparison, we
-also resorted the data set rows to maximize compression.
+This approach of performing Boolean operations on large bitmap sets is commonly
+used in search engines. Bitmap compression algorithms are a well-defined area
+of research and often utilize run-length encoding. Popular algorithms include
+Byte-aligned Bitmap Code \cite{antoshenkov1995byte}, Word-Aligned Hybrid (WAH)
+code \cite{wu2006optimizing}, and Partitioned Word-Aligned Hybrid (PWAH)
+compression \cite{van2011memory}. Druid opted to use the Concise algorithm
+\cite{colantonio2010concise} as it can outperform WAH by reducing the size of
+the compressed bitmaps by up to 50\%. Figure~\ref{fig:concise_plot}
+illustrates the number of bytes using Concise compression versus using an
+integer array. The results were generated on a cc2.8xlarge system with a single
+thread, 2G heap, 512m young gen, and a forced GC between each run. The data set
+is a single day’s worth of data collected from the Twitter garden hose
+\cite{twitter2013} data stream. The data set contains 2,272,295 rows and 12
+dimensions of varying cardinality. As an additional comparison, we also
+resorted the data set rows to maximize compression.
-In the unsorted case, the total Concise size was
-53,451,144 bytes and the total integer array size was 127,248,520 bytes. Overall,
-Concise compressed sets are about 42\% smaller than integer arrays.
-In the sorted case, the total Concise compressed size was 43,832,884
-bytes and the total integer array size was 127,248,520 bytes. What is
-interesting to note is that after sorting, global compression only
-increased minimally. The total Concise set size to total integer array
-size is 34\%. It is also interesting to note that as the cardinality
-of a dimension approaches the total number of rows in a data set,
-integer arrays require less space than Concise sets and become a better alternative.
+In the unsorted case, the total Concise size was 53,451,144 bytes and the total
+integer array size was 127,248,520 bytes. Overall, Concise compressed sets are
+about 42\% smaller than integer arrays. In the sorted case, the total Concise
+compressed size was 43,832,884 bytes and the total integer array size was
+127,248,520 bytes. What is interesting to note is that after sorting, global
+compression only increased minimally. The total Concise set size to total
+integer array size is 34\%. It is also interesting to note that as the
+cardinality of a dimension approaches the total number of rows in a data set,
+integer arrays require less space than Concise sets and become a better
+alternative.
\subsection{Storage Engine}
-Druid’s persistence components allows for different storage engines to
-be plugged in, similar to Dynamo \cite{decandia2007dynamo}. These storage engines may store
-data in in-memory structures such as the JVM heap or in memory-mapped
+Druid’s persistence components allows for different storage engines to be
+plugged in, similar to Dynamo \cite{decandia2007dynamo}. These storage engines
+may store data in in-memory structures such as the JVM heap or in memory-mapped
structures. The ability to swap storage engines allows for Druid to be
-configured depending on a particular application’s specifications. An
-in-memory storage engine may be operationally more expensive than a
-memory-mapped storage engine but could be a better alternative if
-performance is critical. At Metamarkets, we commonly use a
-memory-mapped storage engine.
+configured depending on a particular application’s specifications. An in-memory
+storage engine may be operationally more expensive than a memory-mapped storage
+engine but could be a better alternative if performance is critical. By
+default, a memory-mapped storage engine is used.
-\section{Robustness}
-\label{sec:robustness}
-To achieve high system availability and data durability, Druid employs
-several fault recovery techniques. Druid has no single point of
-failure.
+Druid relies on the operating system to page segments in and out of memory.
+Given that segments can only be scanned if they are loaded in memory, a
+memory-mapped storage engine allows recent segments to retain in memory whereas
+segments that are never queried are paged out of memory. The main drawback with
+using the memory-mapped storage engine is in the event a query requires more
+segments to be paged into memory than a given node has capacity for. In this
+case, query performance will suffer from the cost of paging segments in and out
+of memory.
-\subsection{Replication}
-Druid replicates historical segments on multiple hosts. The number of
-replicates in each tier of the historical compute cluster is fully
-configurable. Setups that require high levels of fault tolerance can
-be configured to have a high number of replicates. Replicates are
-assigned to historical nodes by coordination nodes using the same load
-distribution algorithm discussed in Section~\ref{sec:caching}. Broker nodes forward queries to the first node they find that contain a segment required for the query.
+\section{Query API}
+\label{sec:query-api}
+Druid has its own query language and accepts queries as POST requests. Broker,
+historical, and real-time nodes all share the same query API.
-Real-time segments follow a different replication model as real-time
-segments are mutable. Multiple real-time nodes can read from the same message
-bus and event stream if each node maintains a unique offset and consumer id, hence creating multiple copies
-of a real-time segment. This is conceptually different than multiple
-nodes reading from the same event stream and sharing the same offset and consumer id, doing so would create
-multiple segment partitions. If a real-time node fails and recovers, it can
-reload any indexes that were persisted to disk and read from the
-message bus from the point it last committed an offset.
+The body of the POST request is a JSON object containing key-value pairs
+specifying various query parameters. A typical query will contain the data
+source name, the granularity of the result data, time range of interest, the
+type of request, and the metrics to aggregate over. The result will also be a
+JSON object containing the aggregated metrics over the time period.
-\subsection{Failure Detection}
-If a historical node completely fails and becomes unavailable, the
-ephemeral Zookeeper znodes it created are deleted. The master node
-will notice that certain segments are insufficiently replicated or
-missing altogether. Additional replicates will be created and
-redistributed throughout the cluster.
+Most query types will also support a filter set. A filter set is a Boolean
+expression of dimension name and value pairs. Any number and combination of
+dimensions and values may be specified. When a filter set is provided, only
+the subset of the data that pertains to the filter set will be scanned. The
+ability to handle complex nested filter sets is what enables Druid to drill
+into data at any depth.
-We are moving towards building out infrastructure to support
-programmatic creation of real-time nodes. In the near future, the
-master node or a similar service will notice if real-time segments are insufficiently
-replicated and automatically create additional real-time nodes as
-redundant backups.
+The exact query syntax depends on the query type and the information requested.
+A sample count query over a week of data is shown below:
+\begin{verbatim}
+{
+ "queryType" : "timeseries",
+ "dataSource" : "wikipedia",
+ "intervals" : "2013-01-01/2013-01-08",
+ "filter" : {
+ "type" : "selector",
+ "dimension" : "page",
+ "value" : "Ke$ha"
+ },
+ "granularity" : "day",
+ "aggregations" : [
+ {
+ "type" : "count",
+ "name" : "rows"
+ }
+ ]
+}
+\end{verbatim}
-Coordination and broker nodes always have redundant backup nodes in the event
-the primary fails. The backup nodes are usually idle until Zookeeper changes alert
-them to assume the responsibilities of their primary counterparts.
+The query shown above will return a count of the number of rows in the \emph{wikipedia} datasource
+from 2013-01-01 to 2013-01-08, filtered for only those rows where the value of the "page" dimension is
+equal to "Ke\$ha". The results will be bucketed by day and will be a JSON array of the following form:
+\begin{verbatim}
+[
+ {
+ "timestamp": "2012-01-01T00:00:00.000Z",
+ "result": {
+ "rows": 393298
+ }
+ },
+ {
+ "timestamp": "2012-01-02T00:00:00.000Z",
+ "result": {
+ "rows": 382932
+ }
+ },
+ ...
+ {
+ "timestamp": "2012-01-07T00:00:00.000Z",
+ "result": {
+ "rows": 1337
+ }
+ }
+]
+\end{verbatim}
-\subsection{Adding and Removing Nodes}
-Starting and removing Druid nodes is done by starting and stopping Java processes. There is minimal
-operational overhead with adding nodes in batches. Scaling down the
-cluster is usually done one node at a time with some time lapse
-between shutdowns. This allows the master to have ample time to
-redistribute load and create additional replicates. Shutting down
-nodes in batches is not recommended as it may destroy all
-copies of a segment, which would lead to data loss.
+Druid supports many types of aggregations including double sums, long sums,
+minimums, maximums, and several others. Druid also supports complex aggregations
+such as cardinality estimation and approxmiate quantile estimation. The
+results of aggregations can be combined in mathematical expressions to form
+other aggregations. The query API is highly customizable and can be extended to
+filter and group results based on almost any arbitrary condition. It is beyond
+the scope of this paper to fully describe the query API but more information
+can be found
+online\footnote{\href{http://druid.io/docs/latest/Querying.html}{http://druid.io/docs/latest/Querying.html}}.
+We are also in the process of extending the Druid API to understand SQL.
\section{Performance Benchmarks}
\label{sec:benchmarks}
+To illustrate Druid's performance, we conducted a series of experiments that
+focused on measuring Druid's query and data ingestion capabilities.
+
+\subsection{Query Performance}
\begin{figure}
\centering
\includegraphics[width = 2.8in]{cluster_scan_rate}
@@ -675,21 +734,24 @@ copies of a segment, which would lead to data loss.
from 25 nodes.}
\label{fig:cluster_scan_rate}
\end{figure}
-To benchmark Druid performance, we created a large test cluster with
-6TB of uncompressed data, representing tens of billions of fact
-rows. The data set contained more than a dozen dimensions, with
-cardinalities ranging from the double digits to tens of millions. We computed
-four metrics for each row (counts, sums, and averages). The data was
-sharded first on timestamp then on dimension values, creating
-thousands of shards roughly 8 million fact rows apiece.
+To benchmark Druid query performance, we created a large test cluster with 6TB
+of uncompressed data, representing tens of billions of fact rows. The data set
+contained more than a dozen dimensions, with cardinalities ranging from the
+double digits to tens of millions. We computed four metrics for each row
+(counts, sums, and averages). The data was sharded first on timestamp and then
+on dimension values, creating thousands of shards roughly 8 million fact rows
+apiece.
-The cluster used in the benchmark consisted of 100 historical compute
-nodes, each with 16 cores, 60GB of RAM, 10 GigE Ethernet, and 1TB of
-disk space. Collectively, the cluster comprised of 1600 cores, 6TB or
-RAM, sufficiently fast Ethernet and more than enough disk space.
+The cluster used in the benchmark consisted of 100 historical nodes, each with
+16 cores, 60GB of RAM, 10 GigE Ethernet, and 1TB of disk space. Collectively,
+the cluster comprised of 1600 cores, 6TB or RAM, sufficiently fast Ethernet and
+more than enough disk space.
-SQL statements are included in Table~\ref{tab:sql_queries} to describe the
-purpose of each of the queries. Please note:
+SQL statements are included in Table~\ref{tab:sql_queries}. These queries are
+meant to represent some common queries that are made against Druid for typical data
+analysis workflows. Although Druid has its own query language, we choose to
+translate the queries into SQL to better describe what the queries are doing.
+Please note:
\begin{itemize}
\item The timestamp range of the queries encompassed all data.
\item Each machine was a 16-core machine with 60GB RAM and 1TB of local
@@ -724,18 +786,16 @@ purpose of each of the queries. Please note:
Figure~\ref{fig:cluster_scan_rate} shows the cluster scan rate and
Figure~\ref{fig:core_scan_rate} shows the core scan rate. In
-Figure~\ref{fig:cluster_scan_rate} we also include projected linear
-scaling based on the results of the 25 core cluster. In particular,
-we observe diminishing marginal returns to performance in the size of
-the cluster. Under linear scaling, SQL query 1 would have achieved a
-speed of 37 billion rows per second on our 75 node cluster. In fact,
-the speed was 26 billion rows per second. However, queries 2-6 maintain
-a near-linear speedup up to 50 nodes: the core scan rates in
-Figure~\ref{fig:core_scan_rate} remain nearly constant.
-The increase in speed of a parallel
-computing system is often limited by the time needed for the
-sequential operations of the system, in accordance with Amdahl's law
-\cite{amdahl1967validity}.
+Figure~\ref{fig:cluster_scan_rate} we also include projected linear scaling
+based on the results of the 25 core cluster. In particular, we observe
+diminishing marginal returns to performance in the size of the cluster. Under
+linear scaling, the first SQL count query (query 1) would have achieved a speed
+of 37 billion rows per second on our 75 node cluster. In fact, the speed was
+26 billion rows per second. However, queries 2-6 maintain a near-linear
+speedup up to 50 nodes: the core scan rates in Figure~\ref{fig:core_scan_rate}
+remain nearly constant. The increase in speed of a parallel computing system
+is often limited by the time needed for the sequential operations of the
+system, in accordance with Amdahl's law \cite{amdahl1967validity}.
The first query listed in Table~\ref{tab:sql_queries} is a simple
count, achieving scan rates of 33M rows/second/core. We believe
@@ -752,36 +812,87 @@ because of the column-oriented storage format Druid employs. For the
the ``where'' clause. As we add metrics, it has to also load those metric
values and scan over them, increasing the amount of memory scanned.
+\subsection{Data Ingestion Performance}
+To measure Druid's data latency latency, we spun up a single real-time node
+with the following configurations:
+\begin{itemize}
+\item JVM arguments: -Xmx2g -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+HeapDumpOnOutOfMemoryError
+\item CPU: 2.3 GHz Intel Core i7
+\end{itemize}
+
+Druid's data ingestion latency is heavily dependent on the complexity of the data set
+being ingested. The data complexity is determined by the number of dimensions
+in each event, the number of metrics in each event, and the types of
+aggregations we want to perform as we roll up data to a certain time
+granularity. With the most basic data set (one that only has a timestamp
+column), our setup can ingest data at a rate of 800k events/sec/node, which is
+really just a measurement of how fast we can deserialize events. Real world
+data sets are never this simple. To simulate more real-world ingestion rates,
+we created a data set with 5 dimensions and a single metric. 4 out of the 5
+dimensions have a cardinality less than 100, and we varied the cardinality of
+the final dimension. The results of varying the cardinality of a dimension is
+shown in Figure~\ref{fig:cardinality_vs_throughput}.
+\begin{figure}
+\centering
+\includegraphics[width = 2.8in]{cardinality_vs_throughput}
+\caption{Varying the cardinality of a single dimension, we can see the impact on throughput.}
+\label{fig:cardinality_vs_throughput}
+\end{figure}
+
+In Figure~\ref{fig:throughput_vs_num_dims}, we instead vary the number of
+dimensions in our data set. Each dimension has a cardinality less than 100. We
+can see a similar decline in ingestion throughput as the number of dimensions
+increases.
+\begin{figure}
+\centering
+\includegraphics[width = 2.8in]{throughput_vs_num_dims}
+\caption{Increasing the number of dimensions of our data set, we see a similar decline in throughput.}
+\label{fig:throughput_vs_num_dims}
+\end{figure}
+
+Finally, keeping our number of dimensions constant at 5, with each dimension
+having a cardinality in the 0-100 range, we can see a similar decline in
+throughput when we increase the number of metrics in the data set. For most
+real world data sets, the number of metrics tends to be less than the number of
+dimensions. Hence, we can see that adding a few new metrics does not
+substantially impact the ingestion latency.
+\begin{figure}
+\centering
+\includegraphics[width = 2.8in]{ingestion_latency_vs_num_metrics}
+\caption{Adding new metrics to a data set decreases ingestion latency, however, in most real world data sets, the number of metrics in a data set tends to be low and the impact of adding them is overly substantial.}
+\label{fig:ingestion_latency_vs_num_metrics}
+\end{figure}
+
\section{Related Work}
\label{sec:related}
-Cattell \cite{cattell2011scalable} maintains a great summary about existing Scalable SQL and
-NoSQL data stores. Druid
-feature-wise sits somewhere between Google’s Dremel \cite{melnik2010dremel} and PowerDrill
-\cite{hall2012processing}. Druid has most of the features implemented in Dremel (Dremel
-handles arbitrary nested data structures while Druid only allows for a
-single level of array-based nesting) and many of the interesting
-compression algorithms mentioned in PowerDrill.
+Cattell \cite{cattell2011scalable} maintains a great summary about existing
+Scalable SQL and NoSQL data stores. Hu \cite{hu2011stream} contributed another
+great summary for streaming databases. Druid feature-wise sits somewhere
+between Google’s Dremel \cite{melnik2010dremel} and PowerDrill
+\cite{hall2012processing}. Druid has most of the features implemented in Dremel
+(Dremel handles arbitrary nested data structures while Druid only allows for a
+single level of array-based nesting) and many of the interesting compression
+algorithms mentioned in PowerDrill.
-Although Druid builds on many of the same principles as other
-distributed columnar data stores \cite{fink2012distributed}, most existing data
-stores are designed to be key-value stores \cite{lerner2010redis}, or
-document/extensible record stores \cite{stonebraker2005c}. Such data stores are great
-solutions for traditional data warehouse needs and general
-back-office/reporting usage. Typically, analysts will query these data
-stores and build reports from the results. In-memory databases such as
-SAP’s HANA \cite{farber2012sap} and VoltDB \cite{voltdb2010voltdb} are examples of other data stores that
-are highly suited for traditional data warehousing needs. Druid is a
-front-office system designed such that user-facing dashboards can be
-built on top of it. Similar to \cite{paraccel2013}, Druid has analytical features
-built in. The main features Druid offers over traditional data
-warehousing solutions are real-time data ingestion, interactive
-queries and interactive query latencies. In terms of real-time
-ingestion and processing of data, Trident/Storm \cite{marz2013storm} and Streaming
-Spark \cite{zaharia2012discretized} are other popular real-time computation systems, although
-they lack the data storage capabilities of Druid. Spark/Shark \cite{engle2012shark} are
-also doing similar work in the area of fast data analysis on large
-scale data sets. Cloudera Impala \cite{cloudera2013} is another system focused on
-optimizing querying performance, but more so in Hadoop environments.
+Although Druid builds on many of the same principles as other distributed
+columnar data stores \cite{fink2012distributed}, many of these data stores are
+designed to be more generic key-value stores \cite{stonebraker2005c} and do not
+support computation directly in the storage layer. These data stores remain
+popular solutions in the traditional data warehousing space. Other popular
+systems designed for some of the same use cases that Druid is designed to solve
+include in-memory databases such as SAP’s HANA \cite{farber2012sap} and VoltDB
+\cite{voltdb2010voltdb}. Druid is a front-office system designed such that
+user-facing dashboards can be built on top of it. Similar to
+\cite{paraccel2013}, Druid has analytical features built in. The main features
+Druid offers over traditional data warehousing solutions are real-time data
+ingestion, interactive queries and interactive query latencies. In terms of
+real-time ingestion and processing of data, Trident/Storm \cite{marz2013storm}
+and Streaming Spark \cite{zaharia2012discretized} are other popular real-time
+computation systems, although they lack the data storage capabilities of Druid.
+Spark/Shark \cite{engle2012shark} are also doing similar work in the area of
+fast data analysis on large scale data sets. Cloudera Impala
+\cite{cloudera2013} is another system focused on optimizing querying
+performance, but more so in Hadoop environments.
Druid leverages a unique combination of algorithms in its
architecture. Although we believe no other data store has the same set
@@ -791,25 +902,23 @@ stores \cite{macnicol2004sybase}.
\section{Conclusions}
\label{sec:conclusions}
-In this paper, we presented Druid, a distributed, column-oriented,
-real-time analytical data store. Druid is a highly customizable
-solution that is optimized for fast query latencies. Druid ingests
-data in real-time and is fault-tolerant. We discussed the performance
-of Druid on billion row data sets. We summarized key Druid architecture
-aspects such as the storage format, query language and general
-execution. In the future, we plan to cover more in depth the different
-algorithms we’ve developed for Druid and how other systems may plug
-into Druid to achieve powerful use cases.
+In this paper, we presented Druid, a distributed, column-oriented, real-time
+analytical data store. Druid is designed to power high performance applications
+and is optimized for low query latencies. Druid ingests data in real-time and
+is fault-tolerant. We discussed Druid performance benchmarks on billion row
+data sets. We summarized key architecture aspects such as the storage format,
+query language, and general execution. In the future, we plan to cover the
+different algorithms we’ve developed for Druid and how other systems may plug
+into Druid in greater detail.
\balance
\section{Acknowledgements}
\label{sec:acknowledgements}
-We want to thank Steve Harris for his feedback on improving this paper, Adam Smith for giving us the chance to write this paper, and Katherine Chu for
-helping to create all the images in this paper. Druid could not have been built without the help of many great
-engineers at Metamarkets and in the community. We want to thank Danny Yuan, Jae Hyeon Bae, Paul Baclace, Dave
-Nielsen, and Dhruv Parthasarathy for their
-contributions to Druid.
+Druid could not have been built without the help of many great engineers at
+Metamarkets and in the community. We want to thank everyone that has
+contributed to the Druid codebase for their invaluable support. In particular
+we want to thank Steve Harris for providing feedback on improving this paper.
% The following two commands are all you need in the
% initial runs of your .tex file to
diff --git a/publications/whitepaper/figures/caching.png b/publications/whitepaper/figures/caching.png
new file mode 100644
index 00000000000..e3ee4dd94df
Binary files /dev/null and b/publications/whitepaper/figures/caching.png differ
diff --git a/publications/whitepaper/figures/cardinality_vs_throughput.png b/publications/whitepaper/figures/cardinality_vs_throughput.png
new file mode 100644
index 00000000000..6ef18cbfd19
Binary files /dev/null and b/publications/whitepaper/figures/cardinality_vs_throughput.png differ
diff --git a/publications/whitepaper/figures/cluster.png b/publications/whitepaper/figures/cluster.png
new file mode 100644
index 00000000000..9dba8513538
Binary files /dev/null and b/publications/whitepaper/figures/cluster.png differ
diff --git a/publications/whitepaper/figures/historical_download.png b/publications/whitepaper/figures/historical_download.png
new file mode 100644
index 00000000000..416d44bfc6c
Binary files /dev/null and b/publications/whitepaper/figures/historical_download.png differ
diff --git a/publications/whitepaper/figures/ingestion_latency_vs_num_metrics.png b/publications/whitepaper/figures/ingestion_latency_vs_num_metrics.png
new file mode 100644
index 00000000000..0ffffa0a68b
Binary files /dev/null and b/publications/whitepaper/figures/ingestion_latency_vs_num_metrics.png differ
diff --git a/publications/whitepaper/figures/realtime_flow.png b/publications/whitepaper/figures/realtime_flow.png
new file mode 100644
index 00000000000..ac9a241ab54
Binary files /dev/null and b/publications/whitepaper/figures/realtime_flow.png differ
diff --git a/publications/whitepaper/figures/realtime_pipeline.png b/publications/whitepaper/figures/realtime_pipeline.png
new file mode 100644
index 00000000000..f338d239117
Binary files /dev/null and b/publications/whitepaper/figures/realtime_pipeline.png differ
diff --git a/publications/whitepaper/figures/realtime_timeline.png b/publications/whitepaper/figures/realtime_timeline.png
new file mode 100644
index 00000000000..76806bc9aa2
Binary files /dev/null and b/publications/whitepaper/figures/realtime_timeline.png differ
diff --git a/publications/whitepaper/figures/throughput_vs_num_dims.png b/publications/whitepaper/figures/throughput_vs_num_dims.png
new file mode 100644
index 00000000000..60f4842a3cc
Binary files /dev/null and b/publications/whitepaper/figures/throughput_vs_num_dims.png differ
diff --git a/rabbitmq/pom.xml b/rabbitmq/pom.xml
new file mode 100644
index 00000000000..8fc0ffe682f
--- /dev/null
+++ b/rabbitmq/pom.xml
@@ -0,0 +1,43 @@
+
+
+ 4.0.0
+ io.druid.extensions
+ druid-rabbitmq
+ druid-rabbitmq
+ druid-rabbitmq
+
+
+ io.druid
+ druid
+ 0.6.27-SNAPSHOT
+
+
+
+
+ io.druid
+ druid-api
+
+
+ com.rabbitmq
+ amqp-client
+ 3.2.1
+
+
+ net.jodah
+ lyra
+ 0.3.1
+
+
+
+
+ junit
+ junit
+ test
+
+
+ commons-cli
+ commons-cli
+ test
+
+
+
\ No newline at end of file
diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/JacksonifiedConnectionFactory.java b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/JacksonifiedConnectionFactory.java
similarity index 98%
rename from server/src/main/java/io/druid/segment/realtime/firehose/JacksonifiedConnectionFactory.java
rename to rabbitmq/src/main/java/io/druid/firehose/rabbitmq/JacksonifiedConnectionFactory.java
index 03f35cf48f7..132fe3b6179 100644
--- a/server/src/main/java/io/druid/segment/realtime/firehose/JacksonifiedConnectionFactory.java
+++ b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/JacksonifiedConnectionFactory.java
@@ -17,7 +17,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-package io.druid.segment.realtime.firehose;
+package io.druid.firehose.rabbitmq;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.rabbitmq.client.ConnectionFactory;
diff --git a/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQDruidModule.java b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQDruidModule.java
new file mode 100644
index 00000000000..548cbcc1d1a
--- /dev/null
+++ b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQDruidModule.java
@@ -0,0 +1,50 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013 Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+package io.druid.firehose.rabbitmq;
+
+import com.fasterxml.jackson.databind.Module;
+import com.fasterxml.jackson.databind.jsontype.NamedType;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import com.google.common.collect.ImmutableList;
+import com.google.inject.Binder;
+import io.druid.initialization.DruidModule;
+
+import java.util.List;
+
+/**
+ */
+public class RabbitMQDruidModule implements DruidModule
+{
+ @Override
+ public List extends Module> getJacksonModules()
+ {
+ return ImmutableList.of(
+ new SimpleModule("RabbitMQFirehoseModule")
+ .registerSubtypes(
+ new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq")
+ )
+ );
+ }
+
+ @Override
+ public void configure(Binder binder)
+ {
+
+ }
+}
diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseConfig.java b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseConfig.java
similarity index 73%
rename from server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseConfig.java
rename to rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseConfig.java
index 42e10dd601b..7bae291c8a3 100644
--- a/server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseConfig.java
+++ b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseConfig.java
@@ -17,7 +17,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-package io.druid.segment.realtime.firehose;
+package io.druid.firehose.rabbitmq;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -33,6 +33,11 @@ public class RabbitMQFirehoseConfig
private boolean exclusive = false;
private boolean autoDelete = false;
+ // Lyra (auto reconnect) properties
+ private int maxRetries = 100;
+ private int retryIntervalSeconds = 2;
+ private long maxDurationSeconds = 5 * 60;
+
@JsonProperty
public String getQueue()
{
@@ -98,4 +103,31 @@ public class RabbitMQFirehoseConfig
{
this.autoDelete = autoDelete;
}
+
+ @JsonProperty
+ public int getMaxRetries() {
+ return maxRetries;
+ }
+
+ public void setMaxRetries(int maxRetries) {
+ this.maxRetries = maxRetries;
+ }
+
+ @JsonProperty
+ public int getRetryIntervalSeconds() {
+ return retryIntervalSeconds;
+ }
+
+ public void setRetryIntervalSeconds(int retryIntervalSeconds) {
+ this.retryIntervalSeconds = retryIntervalSeconds;
+ }
+
+ @JsonProperty
+ public long getMaxDurationSeconds() {
+ return maxDurationSeconds;
+ }
+
+ public void setMaxDurationSeconds(int maxDurationSeconds) {
+ this.maxDurationSeconds = maxDurationSeconds;
+ }
}
diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseFactory.java b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseFactory.java
similarity index 77%
rename from server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseFactory.java
rename to rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseFactory.java
index aa0270df15d..df4d1baeda1 100644
--- a/server/src/main/java/io/druid/segment/realtime/firehose/RabbitMQFirehoseFactory.java
+++ b/rabbitmq/src/main/java/io/druid/firehose/rabbitmq/RabbitMQFirehoseFactory.java
@@ -17,11 +17,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-package io.druid.segment.realtime.firehose;
+package io.druid.firehose.rabbitmq;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.api.client.repackaged.com.google.common.base.Throwables;
import com.metamx.common.logger.Logger;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
@@ -33,6 +32,11 @@ import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.StringInputRowParser;
+import net.jodah.lyra.ConnectionOptions;
+import net.jodah.lyra.Connections;
+import net.jodah.lyra.config.Config;
+import net.jodah.lyra.retry.RetryPolicy;
+import net.jodah.lyra.util.Duration;
import java.io.IOException;
@@ -51,21 +55,25 @@ import java.io.IOException;
* "firehose" : {
* "type" : "rabbitmq",
* "connection" : {
- * "host": "localhost", # The hostname of the RabbitMQ broker to connect to. Default: 'localhost'
- * "port": "5672", # The port number to connect to on the RabbitMQ broker. Default: '5672'
- * "username": "test-dude", # The username to use to connect to RabbitMQ. Default: 'guest'
- * "password": "test-word", # The password to use to connect to RabbitMQ. Default: 'guest'
- * "virtualHost": "test-vhost", # The virtual host to connect to. Default: '/'
- * "uri": "amqp://mqserver:1234/vhost", # The URI string to use to connect to RabbitMQ. No default and not needed
- * },
- * "config" : {
- * "exchange": "test-exchange", # The exchange to connect to. No default
- * "queue" : "druidtest", # The queue to connect to or create. No default
- * "routingKey": "#", # The routing key to use to bind the queue to the exchange. No default
- * "durable": "true", # Whether the queue should be durable. Default: 'false'
- * "exclusive": "false", # Whether the queue should be exclusive. Default: 'false'
- * "autoDelete": "false" # Whether the queue should auto-delete on disconnect. Default: 'false'
- * },
+ * "host": "localhost", # The hostname of the RabbitMQ broker to connect to. Default: 'localhost'
+ * "port": "5672", # The port number to connect to on the RabbitMQ broker. Default: '5672'
+ * "username": "test-dude", # The username to use to connect to RabbitMQ. Default: 'guest'
+ * "password": "test-word", # The password to use to connect to RabbitMQ. Default: 'guest'
+ * "virtualHost": "test-vhost", # The virtual host to connect to. Default: '/'
+ * "uri": "amqp://mqserver:1234/vhost", # The URI string to use to connect to RabbitMQ. No default and not needed
+ * },
+ * "config" : {
+ * "exchange": "test-exchange", # The exchange to connect to. No default
+ * "queue" : "druidtest", # The queue to connect to or create. No default
+ * "routingKey": "#", # The routing key to use to bind the queue to the exchange. No default
+ * "durable": "true", # Whether the queue should be durable. Default: 'false'
+ * "exclusive": "false", # Whether the queue should be exclusive. Default: 'false'
+ * "autoDelete": "false", # Whether the queue should auto-delete on disconnect. Default: 'false'
+ *
+ * "maxRetries": "10", # The max number of reconnection retry attempts
+ * "retryIntervalSeconds": "1", # The reconnection interval
+ * "maxDurationSeconds": "300" # The max duration of trying to reconnect
+ * },
* "parser" : {
* "timestampSpec" : { "column" : "utcdt", "format" : "iso" },
* "data" : { "format" : "json" },
@@ -113,6 +121,13 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
@Override
public Firehose connect() throws IOException
{
+ ConnectionOptions lyraOptions = new ConnectionOptions(this.connectionFactory);
+ Config lyraConfig = new Config()
+ .withRecoveryPolicy(new RetryPolicy()
+ .withMaxRetries(config.getMaxRetries())
+ .withRetryInterval(Duration.seconds(config.getRetryIntervalSeconds()))
+ .withMaxDuration(Duration.seconds(config.getMaxDurationSeconds())));
+
String queue = config.getQueue();
String exchange = config.getExchange();
String routingKey = config.getRoutingKey();
@@ -121,13 +136,7 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
boolean exclusive = config.isExclusive();
boolean autoDelete = config.isAutoDelete();
- final Connection connection;
- try {
- connection = connectionFactory.newConnection();
- } catch (Exception e) {
- log.error("Unable to find a RabbitMQ broker. Are you sure you have one running?");
- throw Throwables.propagate(e);
- }
+ final Connection connection = Connections.create(lyraOptions, lyraConfig);
connection.addShutdownListener(new ShutdownListener()
{
@@ -135,7 +144,6 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
public void shutdownCompleted(ShutdownSignalException cause)
{
log.warn(cause, "Connection closed!");
- //FUTURE: we could try to re-establish the connection here. Not done in this version though.
}
});
@@ -148,7 +156,6 @@ public class RabbitMQFirehoseFactory implements FirehoseFactory
public void shutdownCompleted(ShutdownSignalException cause)
{
log.warn(cause, "Channel closed!");
- //FUTURE: we could try to re-establish the connection here. Not done in this version though.
}
});
diff --git a/rabbitmq/src/main/resources/META-INF/services/io.druid.initialization.DruidModule b/rabbitmq/src/main/resources/META-INF/services/io.druid.initialization.DruidModule
new file mode 100644
index 00000000000..9ba3917f48a
--- /dev/null
+++ b/rabbitmq/src/main/resources/META-INF/services/io.druid.initialization.DruidModule
@@ -0,0 +1 @@
+io.druid.firehose.rabbitmq.RabbitMQDruidModule
\ No newline at end of file
diff --git a/examples/src/main/java/io/druid/examples/rabbitmq/RabbitMQProducerMain.java b/rabbitmq/src/test/java/io/druid/examples/rabbitmq/RabbitMQProducerMain.java
similarity index 100%
rename from examples/src/main/java/io/druid/examples/rabbitmq/RabbitMQProducerMain.java
rename to rabbitmq/src/test/java/io/druid/examples/rabbitmq/RabbitMQProducerMain.java
diff --git a/s3-extensions/pom.xml b/s3-extensions/pom.xml
index fb558506dbb..ccc97398ad2 100644
--- a/s3-extensions/pom.xml
+++ b/s3-extensions/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/server/pom.xml b/server/pom.xml
index 434da7a5ea2..a85223056cc 100644
--- a/server/pom.xml
+++ b/server/pom.xml
@@ -28,7 +28,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
@@ -183,10 +183,6 @@
org.eclipse.jetty
jetty-servlets
-
- com.rabbitmq
- amqp-client
-
com.ircclouds.irc
irc-api
diff --git a/server/src/main/java/io/druid/guice/FirehoseModule.java b/server/src/main/java/io/druid/guice/FirehoseModule.java
index 8a7480fd22c..aacc217e208 100644
--- a/server/src/main/java/io/druid/guice/FirehoseModule.java
+++ b/server/src/main/java/io/druid/guice/FirehoseModule.java
@@ -28,7 +28,6 @@ import io.druid.initialization.DruidModule;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
-import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import java.util.Arrays;
@@ -49,7 +48,6 @@ public class FirehoseModule implements DruidModule
return Arrays.asList(
new SimpleModule("FirehoseModule")
.registerSubtypes(
- new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
new NamedType(ClippedFirehoseFactory.class, "clipped"),
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
new NamedType(IrcFirehoseFactory.class, "irc"),
diff --git a/server/src/main/java/io/druid/server/QueryResource.java b/server/src/main/java/io/druid/server/QueryResource.java
index cbd388a9304..3ee7599e1c8 100644
--- a/server/src/main/java/io/druid/server/QueryResource.java
+++ b/server/src/main/java/io/druid/server/QueryResource.java
@@ -87,6 +87,8 @@ public class QueryResource
@Context HttpServletResponse resp
) throws ServletException, IOException
{
+ final long start = System.currentTimeMillis();
+
Query query = null;
byte[] requestQuery = null;
@@ -118,7 +120,7 @@ public class QueryResource
out = resp.getOutputStream();
jsonWriter.writeValue(out, results);
- long requestTime = System.currentTimeMillis() - req.getSession().getCreationTime();
+ long requestTime = System.currentTimeMillis() - start;
emitter.emit(
new ServiceMetricEvent.Builder()
diff --git a/server/src/test/java/io/druid/client/client/BatchServerInventoryViewTest.java b/server/src/test/java/io/druid/client/client/BatchServerInventoryViewTest.java
index 38864822eb8..a777222efe1 100644
--- a/server/src/test/java/io/druid/client/client/BatchServerInventoryViewTest.java
+++ b/server/src/test/java/io/druid/client/client/BatchServerInventoryViewTest.java
@@ -198,7 +198,8 @@ public class BatchServerInventoryViewTest
private void waitForSync() throws Exception
{
Stopwatch stopwatch = new Stopwatch().start();
- while (Iterables.get(batchServerInventoryView.getInventory(), 0).getSegments().size() != testSegments.size()) {
+ while (!Iterables.isEmpty(batchServerInventoryView.getInventory())
+ && Iterables.get(batchServerInventoryView.getInventory(), 0).getSegments().size() != testSegments.size()) {
Thread.sleep(500);
if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > 5000) {
throw new ISE("BatchServerInventoryView is not updating");
diff --git a/services/pom.xml b/services/pom.xml
index e1ca5244092..0dcad5375c3 100644
--- a/services/pom.xml
+++ b/services/pom.xml
@@ -27,7 +27,7 @@
io.druid
druid
- 0.6.25-SNAPSHOT
+ 0.6.27-SNAPSHOT
diff --git a/services/src/main/java/io/druid/cli/CliBroker.java b/services/src/main/java/io/druid/cli/CliBroker.java
index f94635100c4..17ab46de12c 100644
--- a/services/src/main/java/io/druid/cli/CliBroker.java
+++ b/services/src/main/java/io/druid/cli/CliBroker.java
@@ -53,7 +53,7 @@ import java.util.List;
*/
@Command(
name = "broker",
- description = "Runs a broker node, see http://druid.io/docs/0.6.24/Broker.html for a description"
+ description = "Runs a broker node, see http://druid.io/docs/0.6.26/Broker.html for a description"
)
public class CliBroker extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/CliCoordinator.java b/services/src/main/java/io/druid/cli/CliCoordinator.java
index 74686c57806..937d11a88d7 100644
--- a/services/src/main/java/io/druid/cli/CliCoordinator.java
+++ b/services/src/main/java/io/druid/cli/CliCoordinator.java
@@ -63,7 +63,7 @@ import java.util.List;
*/
@Command(
name = "coordinator",
- description = "Runs the Coordinator, see http://druid.io/docs/0.6.24/Coordinator.html for a description."
+ description = "Runs the Coordinator, see http://druid.io/docs/0.6.26/Coordinator.html for a description."
)
public class CliCoordinator extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
index 9a8f1fc9bd3..580bdd8f3da 100644
--- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
+++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
@@ -41,7 +41,7 @@ import java.util.List;
*/
@Command(
name = "hadoop",
- description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.24/Batch-ingestion.html for a description."
+ description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.26/Batch-ingestion.html for a description."
)
public class CliHadoopIndexer implements Runnable
{
diff --git a/services/src/main/java/io/druid/cli/CliHistorical.java b/services/src/main/java/io/druid/cli/CliHistorical.java
index 24ad591118f..15abef7cae5 100644
--- a/services/src/main/java/io/druid/cli/CliHistorical.java
+++ b/services/src/main/java/io/druid/cli/CliHistorical.java
@@ -42,7 +42,7 @@ import java.util.List;
*/
@Command(
name = "historical",
- description = "Runs a Historical node, see http://druid.io/docs/0.6.24/Historical.html for a description"
+ description = "Runs a Historical node, see http://druid.io/docs/0.6.26/Historical.html for a description"
)
public class CliHistorical extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/CliOverlord.java b/services/src/main/java/io/druid/cli/CliOverlord.java
index 6c0c71a2893..c0a2d14b54c 100644
--- a/services/src/main/java/io/druid/cli/CliOverlord.java
+++ b/services/src/main/java/io/druid/cli/CliOverlord.java
@@ -93,7 +93,7 @@ import java.util.List;
*/
@Command(
name = "overlord",
- description = "Runs an Overlord node, see http://druid.io/docs/0.6.24/Indexing-Service.html for a description"
+ description = "Runs an Overlord node, see http://druid.io/docs/0.6.26/Indexing-Service.html for a description"
)
public class CliOverlord extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/CliRealtime.java b/services/src/main/java/io/druid/cli/CliRealtime.java
index 6363b2dfd4e..e01a1f62bac 100644
--- a/services/src/main/java/io/druid/cli/CliRealtime.java
+++ b/services/src/main/java/io/druid/cli/CliRealtime.java
@@ -30,7 +30,7 @@ import java.util.List;
*/
@Command(
name = "realtime",
- description = "Runs a realtime node, see http://druid.io/docs/0.6.24/Realtime.html for a description"
+ description = "Runs a realtime node, see http://druid.io/docs/0.6.26/Realtime.html for a description"
)
public class CliRealtime extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/CliRealtimeExample.java b/services/src/main/java/io/druid/cli/CliRealtimeExample.java
index 61142fe69de..659810cffe6 100644
--- a/services/src/main/java/io/druid/cli/CliRealtimeExample.java
+++ b/services/src/main/java/io/druid/cli/CliRealtimeExample.java
@@ -42,7 +42,7 @@ import java.util.concurrent.Executor;
*/
@Command(
name = "realtime",
- description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.24/Realtime.html for a description"
+ description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.26/Realtime.html for a description"
)
public class CliRealtimeExample extends ServerRunnable
{
diff --git a/services/src/main/java/io/druid/cli/convert/ConvertProperties.java b/services/src/main/java/io/druid/cli/convert/ConvertProperties.java
index 14c7037e9e0..3cc867700d0 100644
--- a/services/src/main/java/io/druid/cli/convert/ConvertProperties.java
+++ b/services/src/main/java/io/druid/cli/convert/ConvertProperties.java
@@ -72,7 +72,7 @@ public class ConvertProperties implements Runnable
new PrefixRename("com.metamx.emitter", "druid.emitter"),
new PrefixRename("com.metamx.druid.emitter", "druid.emitter"),
new IndexCacheConverter(),
- new Rename("druid.paths.segmentInfoCache", "druid.segmentCache.infoPath"),
+ new Rename("druid.paths.segmentInfoCache", "druid.segmentCache.infoDir"),
new Rename("com.metamx.aws.accessKey", "druid.s3.accessKey"),
new Rename("com.metamx.aws.secretKey", "druid.s3.secretKey"),
new Rename("druid.bard.maxIntervalDuration", "druid.query.chunkDuration"),