Merge branch 'master' into compute-populate-cache

2014-03-25 13:23:00 +05:30 · 2014-03-25 13:23:00 +05:30 · 6a34c09e05
parent 69e9b7a6c5 98cd83071a
commit 6a34c09e05
44 changed files with 1843 additions and 73 deletions
--- a/build.sh
+++ b/build.sh
@ -30,4 +30,4 @@ echo "For examples, see: "
 echo " "
 ls -1 examples/*/*sh
 echo " "
-echo "See also http://druid.io/docs/0.6.72"
+echo "See also http://druid.io/docs/0.6.73"
--- a/cassandra-storage/pom.xml
+++ b/cassandra-storage/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/common/pom.xml
+++ b/common/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/docs/content/Examples.md
+++ b/docs/content/Examples.md
@ -19,13 +19,13 @@ Clone Druid and build it:
 git clone https://github.com/metamx/druid.git druid
 cd druid
 git fetch --tags
-git checkout druid-0.6.72
+git checkout druid-0.6.73
 ./build.sh
 ```

 ### Downloading the DSK (Druid Standalone Kit)

-[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.72-bin.tar.gz) a stand-alone tarball and run it:
+[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.73-bin.tar.gz) a stand-alone tarball and run it:

 ``` bash
 tar -xzf druid-services-0.X.X-bin.tar.gz
--- a/docs/content/Indexing-Service-Config.md
+++ b/docs/content/Indexing-Service-Config.md
@ -66,7 +66,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/indexer

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.73"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
@ -115,7 +115,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/worker

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.72","io.druid.extensions:druid-kafka-seven:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.73","io.druid.extensions:druid-kafka-seven:0.6.73"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
--- a/docs/content/Realtime-Config.md
+++ b/docs/content/Realtime-Config.md
@ -27,7 +27,7 @@ druid.host=localhost
 druid.service=realtime
 druid.port=8083

-druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.73"]


 druid.zk.service.host=localhost
@ -76,7 +76,7 @@ druid.host=#{IP_ADDR}:8080
 druid.port=8080
 druid.service=druid/prod/realtime

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.72","io.druid.extensions:druid-kafka-seven:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.73","io.druid.extensions:druid-kafka-seven:0.6.73"]

 druid.zk.service.host=#{ZK_IPs}
 druid.zk.paths.base=/druid/prod
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 ### Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.72-bin.tar.gz). Download this file to a directory of your choosing.
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.73-bin.tar.gz). Download this file to a directory of your choosing.

 You can extract the awesomeness within by issuing:

@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.72
+cd druid-services-0.6.73
 ```

 You should see a bunch of files:
--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes and external depende

 If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.

-You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.72-bin.tar.gz)
+You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.73-bin.tar.gz)

 and untar the contents within by issuing:

@ -149,7 +149,7 @@ druid.port=8081

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.73"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
@ -240,7 +240,7 @@ druid.port=8083

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.72","io.druid.extensions:druid-kafka-seven:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.73","io.druid.extensions:druid-kafka-seven:0.6.73"]

 # Change this config to db to hand off to the rest of the Druid cluster
 druid.publish.type=noop
--- a/docs/content/Tutorial:-Webstream.md
+++ b/docs/content/Tutorial:-Webstream.md
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu

 h3. Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.72-bin.tar.gz)
+We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.73-bin.tar.gz)
 Download this file to a directory of your choosing.
 You can extract the awesomeness within by issuing:

@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
 Not too lost so far right? That's great! If you cd into the directory:

 ```
-cd druid-services-0.6.72
+cd druid-services-0.6.73
 ```

 You should see a bunch of files:
--- a/docs/content/Twitter-Tutorial.textile
+++ b/docs/content/Twitter-Tutorial.textile
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.

 h3. Download a Tarball

-We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.72-bin.tar.gz.
+We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.73-bin.tar.gz.
 Download this bad boy to a directory of your choosing.

 You can extract the awesomeness within by issuing:
--- a/docs/content/index.md
+++ b/docs/content/index.md
@ -31,20 +31,20 @@ We have more details about the general design of the system and why you might wa
 When Druid?
 ----------

-* You need to do interactive, fast, exploration of large amounts of data
-* You need analytics (not key value store)
-* You have a lot of data (10s of Billions of events added per day, 10s of TB of data added per day)
-* You want to do your analysis on data as it’s happening (realtime)
-* Your store needs to be always-on, 24x7x365 and years into the future.
+* You need to do interactive, fast, exploration on large amounts of data
+* You need analytics (not a key-value store)
+* You have a lot of data (10s of billions of events added per day, 10s of TB of data added per day)
+* You want to do your analysis on data as it’s happening (in real-time)
+* You need a data store that is always available, 24x7x365, and years into the future.


 Not Druid?
 ----------

-* The amount of data you have can easily be handled by MySql
-* Your querying for individual entries or doing lookups (Not Analytics)
-* Batch is good enough
-* Canned queries is good enough
+* The amount of data you have can easily be handled by MySQL
+* You're querying for individual entries or doing lookups (not analytics)
+* Batch ingestion is good enough
+* Canned queries are good enough
 * Downtime is no big deal


--- a/examples/config/historical/runtime.properties
+++ b/examples/config/historical/runtime.properties
@ -4,7 +4,7 @@ druid.port=8081

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.73"]

 # Dummy read only AWS account (used to download example data)
 druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
--- a/examples/config/realtime/runtime.properties
+++ b/examples/config/realtime/runtime.properties
@ -4,7 +4,7 @@ druid.port=8083

 druid.zk.service.host=localhost

-druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.72","io.druid.extensions:druid-kafka-seven:0.6.72","io.druid.extensions:druid-rabbitmq:0.6.72"]
+druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.73","io.druid.extensions:druid-kafka-seven:0.6.73","io.druid.extensions:druid-rabbitmq:0.6.73"]

 # Change this config to db to hand off to the rest of the Druid cluster
 druid.publish.type=noop
--- a/examples/pom.xml
+++ b/examples/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/hdfs-storage/pom.xml
+++ b/hdfs-storage/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/hll/pom.xml
+++ b/hll/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/kafka-eight/pom.xml
+++ b/kafka-eight/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/kafka-seven/pom.xml
+++ b/kafka-seven/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/pom.xml
+++ b/pom.xml
@ -23,14 +23,14 @@
    <groupId>io.druid</groupId>
    <artifactId>druid</artifactId>
    <packaging>pom</packaging>
-    <version>0.6.73-SNAPSHOT</version>
+    <version>0.6.74-SNAPSHOT</version>
    <name>druid</name>
    <description>druid</description>
    <scm>
        <connection>scm:git:ssh://git@github.com/metamx/druid.git</connection>
        <developerConnection>scm:git:ssh://git@github.com/metamx/druid.git</developerConnection>
        <url>http://www.github.com/metamx/druid</url>
-        <tag>druid-0.6.72-SNAPSHOT</tag>
+        <tag>druid-0.6.73-SNAPSHOT</tag>
    </scm>

    <prerequisites>
@ -94,7 +94,7 @@
            <dependency>
                <groupId>com.metamx</groupId>
                <artifactId>server-metrics</artifactId>
-                <version>0.0.5</version>
+                <version>0.0.9</version>
            </dependency>

            <dependency>
@ -548,8 +548,8 @@
                        </dependency>
                    </dependencies>
                </plugin>
-              </plugins>
-            </pluginManagement>
+            </plugins>
+        </pluginManagement>
    </build>

    <repositories>
--- a/processing/pom.xml
+++ b/processing/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/processing/src/main/java/io/druid/query/metadata/metadata/AllColumnIncluderator.java
+++ b/processing/src/main/java/io/druid/query/metadata/metadata/AllColumnIncluderator.java
@ -34,4 +34,16 @@ public class AllColumnIncluderator implements ColumnIncluderator
  {
    return ALL_CACHE_PREFIX;
  }
+
+  @Override
+  public boolean equals(Object obj)
+  {
+    return obj instanceof AllColumnIncluderator;
+  }
+
+  @Override
+  public int hashCode()
+  {
+    return AllColumnIncluderator.class.hashCode();
+  }
 }
--- a/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java
+++ b/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java
@ -21,7 +21,9 @@ package io.druid.query.metadata.metadata;

 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
 import io.druid.query.BaseQuery;
+import io.druid.query.DataSource;
 import io.druid.query.Query;
 import io.druid.query.TableDataSource;
 import io.druid.query.spec.QuerySegmentSpec;
@ -36,17 +38,18 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>

  @JsonCreator
  public SegmentMetadataQuery(
-      @JsonProperty("dataSource") String dataSource,
+      @JsonProperty("dataSource") DataSource dataSource,
      @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
      @JsonProperty("toInclude") ColumnIncluderator toInclude,
      @JsonProperty("merge") Boolean merge,
      @JsonProperty("context") Map<String, String> context
  )
  {
-    super(new TableDataSource(dataSource), querySegmentSpec, context);
+    super(dataSource, querySegmentSpec, context);

    this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude;
    this.merge = merge == null ? false : merge;
+    Preconditions.checkArgument(dataSource instanceof TableDataSource, "SegmentMetadataQuery only supports table datasource");
  }

  @JsonProperty
@ -77,7 +80,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
  public Query<SegmentAnalysis> withOverriddenContext(Map<String, String> contextOverride)
  {
    return new SegmentMetadataQuery(
-        ((TableDataSource)getDataSource()).getName(),
+        getDataSource(),
        getQuerySegmentSpec(), toInclude, merge, computeOverridenContext(contextOverride)
    );
  }
@ -86,7 +89,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
  public Query<SegmentAnalysis> withQuerySegmentSpec(QuerySegmentSpec spec)
  {
    return new SegmentMetadataQuery(
-        ((TableDataSource)getDataSource()).getName(),
+        getDataSource(),
        spec, toInclude, merge, getContext());
  }

--- a/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java
+++ b/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java
@ -21,6 +21,7 @@ package io.druid.query.metadata;

 import com.google.common.collect.Lists;
 import com.metamx.common.guava.Sequences;
+import io.druid.query.LegacyDataSource;
 import io.druid.query.QueryRunner;
 import io.druid.query.QueryRunnerFactory;
 import io.druid.query.QueryRunnerTestHelper;
@ -98,7 +99,7 @@ public class SegmentAnalyzerTest
    );

    final SegmentMetadataQuery query = new SegmentMetadataQuery(
-        "test", QuerySegmentSpecs.create("2011/2012"), null, null, null
+        new LegacyDataSource("test"), QuerySegmentSpecs.create("2011/2012"), null, null, null
    );
    return Sequences.toList(query.run(runner), Lists.<SegmentAnalysis>newArrayList());
  }
--- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java
+++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java
@ -0,0 +1,51 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012, 2013  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package io.druid.query.metadata;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.druid.jackson.DefaultObjectMapper;
+import io.druid.query.Query;
+import io.druid.query.metadata.metadata.SegmentMetadataQuery;
+import org.joda.time.Interval;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SegmentMetadataQueryTest
+{
+  private ObjectMapper mapper = new DefaultObjectMapper();
+
+  @Test
+  public void testSerde() throws Exception
+  {
+    String queryStr = "{\n"
+                      + "  \"queryType\":\"segmentMetadata\",\n"
+                      + "  \"dataSource\":\"test_ds\",\n"
+                      + "  \"intervals\":[\"2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z\"]\n"
+                      + "}";
+    Query query = mapper.readValue(queryStr, Query.class);
+    Assert.assertTrue(query instanceof SegmentMetadataQuery);
+    Assert.assertEquals("test_ds", query.getDataSource().getName());
+    Assert.assertEquals(new Interval("2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z"), query.getIntervals().get(0));
+
+    // test serialize and deserialize
+    Assert.assertEquals(query, mapper.readValue(mapper.writeValueAsString(query), Query.class));
+
+  }
+}
--- a/publications/whitepaper/Makefile
+++ b/publications/whitepaper/Makefile
@ -1,7 +1,18 @@
-all : druid.pdf
+all : druid
+
+druid : druid.pdf
+
+sigmod : sgmd0658-yang.pdf
+
+zip : sgmd0658-yang.zip
+
+%.zip : %.pdf
+	@rm -f dummy.ps
+	@touch dummy.ps
+	zip $@ $*.pdf $*.tex dummy.ps

 clean :
-	@rm -f *.aux *.bbl *.blg *.log
+	@rm -f *.aux *.bbl *.blg *.log dummy.ps *.zip

 %.tex : %.bib

--- a/publications/whitepaper/druid.pdf
+++ b/publications/whitepaper/druid.pdf
--- a/publications/whitepaper/druid.tex
+++ b/publications/whitepaper/druid.tex
@ -1,4 +1,7 @@
-\documentclass{acm_proc_article-sp}
+\documentclass{sig-alternate-2013}
+
+\input{sig-license.tex}
+
 \usepackage{graphicx}
 \usepackage{balance}
 \usepackage{fontspec}
@ -7,28 +10,48 @@
 \graphicspath{{figures/}}
 \usepackage{enumitem}

-\hyphenation{metamarkets nelson}
+\hyphenation{metamarkets nelson cheddar}

 \begin{document}

 % ****************** TITLE ****************************************

-\title{Druid: A Real-time Analytical Data Store}
+\title{Druid}
+\subtitle{A Real-time Analytical Data Store}

 % ****************** AUTHORS **************************************

 \numberofauthors{6}
 \author{
-\alignauthor Fangjin Yang, Eric Tschetter, Xavier Léauté, Nelson Ray, Gian Merlino, Deep Ganguli\\
-\email{\{fangjin, cheddar, xavier, nelson, gian, deep\}@metamarkets.com}
+\alignauthor Fangjin Yang\\
+  \affaddr{Metamarkets Group, Inc.}\\
+  % \affaddr{625 2nd St Suite 230}\\
+  % \affaddr{San Francisco, CA 94107}\\
+  \email{fangjin@metamarkets.com}
+\alignauthor Eric Tschetter\\
+  \email{echeddar@gmail.com}
+\alignauthor Xavier Léauté\\
+  \affaddr{Metamarkets Group, Inc.}\\
+  \email{xavier@metamarkets.com}
+\and
+\alignauthor Nelson Ray\\
+  \email{ncray86@gmail.com}
+\alignauthor Gian Merlino\\
+  \affaddr{Metamarkets Group, Inc.}\\
+  \email{gian@metamarkets.com}
+\alignauthor Deep Ganguli\\
+  \affaddr{Metamarkets Group, Inc.}\\
+  \email{deep@metamarkets.com}
 }
 \date{21 March 2013}

+% ****************** AUTHORS **************************************
+
 \maketitle

 \begin{abstract} 
 Druid is an open
-source\footnote{\href{https://github.com/metamx/druid}{https://github.com/metamx/druid}}
+source\footnote{\href{http://druid.io/}{http://druid.io/} \href{https://github.com/metamx/druid}{https://github.com/metamx/druid}}
 data store designed for real-time exploratory analytics on large data sets.
 The system combines a column-oriented storage layout, a distributed,
 shared-nothing architecture, and an advanced indexing structure to allow for
@ -37,13 +60,19 @@ this paper, we describe Druid's architecture, and detail how it supports fast
 aggregations, flexible filters, and low latency data ingestion.  
 \end{abstract}

+% A category with the (minimum) three required fields
+\category{H.2.4}{Database Management}{Systems}[Distributed databases]
+% \category{D.2.8}{Software Engineering}{Metrics}[complexity measures, performance measures]
+\keywords{distributed; real-time; fault-tolerant; analytics; OLAP; columnar}
+
+
 \section{Introduction} 
 In recent years, the proliferation of internet technology has
 created a surge in machine-generated events.  Individually, these
 events contain minimal useful information and are of low value.  Given the
 time and resources required to extract meaning from large collections of
 events, many companies were willing to discard this data instead.  Although
-infrastructure has been built to handle event based data (e.g. IBM's
+infrastructure has been built to handle event-based data (e.g. IBM's
 Netezza\cite{singh2011introduction}, HP's Vertica\cite{bear2012vertica}, and EMC's
 Greenplum\cite{miner2012unified}), they are largely sold at high price points
 and are only targeted towards those companies who can afford the offering.
@ -146,7 +175,7 @@ Relational Database Management Systems (RDBMS) and NoSQL key/value stores were
 unable to provide a low latency data ingestion and query platform for
 interactive applications \cite{tschetter2011druid}. In the early days of
 Metamarkets, we were focused on building a hosted dashboard that would allow
-users to arbitrary explore and visualize event streams.  The data store
+users to arbitrarily explore and visualize event streams.  The data store
 powering the dashboard needed to return queries fast enough that the data
 visualizations built on top of it could provide users with an interactive
 experience. 
@ -198,7 +227,7 @@ Figure~\ref{fig:cluster}.
 Real-time nodes encapsulate the functionality to ingest and query event
 streams. Events indexed via these nodes are immediately available for querying.
 The nodes are only concerned with events for some small time range and
-periodically hand off immutable batches of events they've collected over this
+periodically hand off immutable batches of events they have collected over this
 small time range to other nodes in the Druid cluster that are specialized in
 dealing with batches of immutable events. Real-time nodes leverage Zookeeper
 \cite{hunt2010zookeeper} for coordination with the rest of the Druid cluster.
@ -789,7 +818,7 @@ approximately 10TB of segments loaded. Collectively,
 there are about 50 billion Druid rows in this tier. Results for
 every data source are not shown.

-\item The hot tier uses Xeon E5-2670 processors and consists of 1302 processing
+\item The hot tier uses Intel Xeon E5-2670 processors and consists of 1302 processing
 threads and 672 total cores (hyperthreaded).

 \item A memory-mapped storage engine was used (the machine was configured to
@ -828,7 +857,7 @@ comparison, we also provide the results of the same queries using MySQL using th
 MyISAM engine (InnoDB was slower in our experiments).

 We selected MySQL to benchmark
-against because of its universal popularity. We choose not to select another
+against because of its universal popularity. We chose not to select another
 open source column store because we were not confident we could correctly tune
 it for optimal performance.

@ -933,9 +962,9 @@ running an Amazon \texttt{cc2.8xlarge} instance.
 \label{fig:ingestion_rate}
 \end{figure}

-The latency measurements we presented are sufficient to address the our stated
+The latency measurements we presented are sufficient to address the stated
 problems of interactivity. We would prefer the variability in the latencies to
-be less. It is still very possible to possible to decrease latencies by adding
+be less. It is still very possible to decrease latencies by adding
 additional hardware, but we have not chosen to do so because infrastructure
 costs are still a consideration to us.

@ -1017,7 +1046,7 @@ data centers as well. The tier configuration in Druid coordinator nodes allow
 for segments to be replicated across multiple tiers. Hence, segments can be
 exactly replicated across historical nodes in multiple data centers.
 Similarily, query preference can be assigned to different tiers. It is possible
-to have nodes in one data center act as a primary cluster (and recieve all
+to have nodes in one data center act as a primary cluster (and receive all
 queries) and have a redundant cluster in another data center. Such a setup may
 be desired if one data center is situated much closer to users. 

--- a/publications/whitepaper/sgmd0658-yang.bib
+++ b/publications/whitepaper/sgmd0658-yang.bib
@ -0,0 +1 @@
+druid.bib
--- a/publications/whitepaper/sgmd0658-yang.tex
+++ b/publications/whitepaper/sgmd0658-yang.tex
@ -0,0 +1 @@
+druid.tex
--- a/publications/whitepaper/sig-alternate-2013.cls
+++ b/publications/whitepaper/sig-alternate-2013.cls
--- a/publications/whitepaper/sig-license.tex
+++ b/publications/whitepaper/sig-license.tex
@ -0,0 +1,12 @@
+\newfont{\mycrnotice}{ptmr8t at 7pt}
+\newfont{\myconfname}{ptmri8t at 7pt}
+\let\crnotice\mycrnotice%
+\let\confname\myconfname%
+\permission{Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.}
+\conferenceinfo{SIGMOD'14,}{June 22--27, 2014, Snowbird, UT, USA. \\
+{\mycrnotice{Copyright is held by the owner/author(s). Publication rights licensed to ACM.}}}
+\copyrightetc{ACM \the\acmcopyr}
+\crdata{978-1-4503-2376-5/14/06\ ...\$15.00.\\
+Include the http://DOI string/url which is specific for your submission and included in the ACM rightsreview confirmation email upon completing your ACM form}
+\clubpenalty=10000
+\widowpenalty = 10000
--- a/rabbitmq/pom.xml
+++ b/rabbitmq/pom.xml
@ -9,7 +9,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/s3-extensions/pom.xml
+++ b/s3-extensions/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/server/pom.xml
+++ b/server/pom.xml
@ -28,7 +28,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/services/pom.xml
+++ b/services/pom.xml
@ -27,7 +27,7 @@
    <parent>
        <groupId>io.druid</groupId>
        <artifactId>druid</artifactId>
-        <version>0.6.73-SNAPSHOT</version>
+        <version>0.6.74-SNAPSHOT</version>
    </parent>

    <dependencies>
--- a/services/src/main/java/io/druid/cli/CliBroker.java
+++ b/services/src/main/java/io/druid/cli/CliBroker.java
@ -54,7 +54,7 @@ import java.util.List;
 */
@Command(
    name = "broker",
-    description = "Runs a broker node, see http://druid.io/docs/0.6.72/Broker.html for a description"
+    description = "Runs a broker node, see http://druid.io/docs/0.6.73/Broker.html for a description"
 )
 public class CliBroker extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliCoordinator.java
+++ b/services/src/main/java/io/druid/cli/CliCoordinator.java
@ -66,7 +66,7 @@ import java.util.List;
 */
@Command(
    name = "coordinator",
-    description = "Runs the Coordinator, see http://druid.io/docs/0.6.72/Coordinator.html for a description."
+    description = "Runs the Coordinator, see http://druid.io/docs/0.6.73/Coordinator.html for a description."
 )
 public class CliCoordinator extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
+++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java
@ -41,7 +41,7 @@ import java.util.List;
 */
@Command(
    name = "hadoop",
-    description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.72/Batch-ingestion.html for a description."
+    description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.73/Batch-ingestion.html for a description."
 )
 public class CliHadoopIndexer implements Runnable
 {
--- a/services/src/main/java/io/druid/cli/CliHistorical.java
+++ b/services/src/main/java/io/druid/cli/CliHistorical.java
@ -45,7 +45,7 @@ import java.util.List;
 */
@Command(
    name = "historical",
-    description = "Runs a Historical node, see http://druid.io/docs/0.6.72/Historical.html for a description"
+    description = "Runs a Historical node, see http://druid.io/docs/0.6.73/Historical.html for a description"
 )
 public class CliHistorical extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliOverlord.java
+++ b/services/src/main/java/io/druid/cli/CliOverlord.java
@ -93,7 +93,7 @@ import java.util.List;
 */
@Command(
    name = "overlord",
-    description = "Runs an Overlord node, see http://druid.io/docs/0.6.72/Indexing-Service.html for a description"
+    description = "Runs an Overlord node, see http://druid.io/docs/0.6.73/Indexing-Service.html for a description"
 )
 public class CliOverlord extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliRealtime.java
+++ b/services/src/main/java/io/druid/cli/CliRealtime.java
@ -30,7 +30,7 @@ import java.util.List;
 */
@Command(
    name = "realtime",
-    description = "Runs a realtime node, see http://druid.io/docs/0.6.72/Realtime.html for a description"
+    description = "Runs a realtime node, see http://druid.io/docs/0.6.73/Realtime.html for a description"
 )
 public class CliRealtime extends ServerRunnable
 {
--- a/services/src/main/java/io/druid/cli/CliRealtimeExample.java
+++ b/services/src/main/java/io/druid/cli/CliRealtimeExample.java
@ -42,7 +42,7 @@ import java.util.concurrent.Executor;
 */
@Command(
    name = "realtime",
-    description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.72/Realtime.html for a description"
+    description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.73/Realtime.html for a description"
 )
 public class CliRealtimeExample extends ServerRunnable
 {