From 10a96626d47be26c95f035dbc31f61f633e60018 Mon Sep 17 00:00:00 2001 From: Brian O'Neill Date: Tue, 7 May 2013 16:53:12 -0400 Subject: [PATCH] Documentation for C* --- examples/cassandra/README.md | 32 +++++++++++++++++++ examples/cassandra/query2 | 11 ------- .../examples/RealtimeStandaloneMain.java | 10 +++--- 3 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 examples/cassandra/README.md delete mode 100644 examples/cassandra/query2 diff --git a/examples/cassandra/README.md b/examples/cassandra/README.md new file mode 100644 index 00000000000..8f34c805d00 --- /dev/null +++ b/examples/cassandra/README.md @@ -0,0 +1,32 @@ +## Introduction +Druid can use Cassandra as a deep storage mechanism. Segments and their metadata are stored in Cassandra in two tables: +`index_storage` and `descriptor_storage`. Underneath the hood, the Cassandra integration leverages Astyanax. The +index storage table is a [Chunked Object](https://github.com/Netflix/astyanax/wiki/Chunked-Object-Store) repository. It contains +compressed segments for distribution to real-time and compute nodes. Since segments can be large, the Chunked Object storage allows the integration to multi-thread +the write to Cassandra, and spreads the data across all the nodes in a cluster. The descriptor storage table is a normal C* table that +stores the segment metadatak. + +## Schema +Below are the create statements for each: + + + + CREATE TABLE index_storage ( key text, chunk text, value blob, PRIMARY KEY (key, chunk)) WITH COMPACT STORAGE; + + CREATE TABLE descriptor_storage ( key varchar, lastModified timestamp, descriptor varchar, PRIMARY KEY (key) ) WITH COMPACT STORAGE; + + +## Getting Started +First create the schema above. (I use a new keyspace called `druid`) + +Then, add the following properties to your properties file to enable a Cassandra +backend. + + druid.pusher.cassandra=true + druid.pusher.cassandra.host=localhost:9160 + druid.pusher.cassandra.keyspace=druid + +Use the `druid-development@googlegroups.com` mailing list if you have questions, +or feel free to reach out directly: `bone@alumni.brown.edu`. + + diff --git a/examples/cassandra/query2 b/examples/cassandra/query2 deleted file mode 100644 index c53a943f899..00000000000 --- a/examples/cassandra/query2 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "queryType": "groupBy", - "dataSource": "appevents", - "granularity": "all", - "dimensions": ["appid", "event"], - "aggregations":[ - {"type":"count", "name":"eventcount"}, - {"type":"doubleSum", "fieldName":"events", "name":"eventssum"} - ], - "intervals":["2012-10-01T00:00/2020-01-01T00"] -} diff --git a/examples/src/main/java/druid/examples/RealtimeStandaloneMain.java b/examples/src/main/java/druid/examples/RealtimeStandaloneMain.java index 10edff548d7..4cb267f571f 100644 --- a/examples/src/main/java/druid/examples/RealtimeStandaloneMain.java +++ b/examples/src/main/java/druid/examples/RealtimeStandaloneMain.java @@ -45,11 +45,11 @@ public class RealtimeStandaloneMain ); // Create dummy objects for the various interfaces that interact with the DB, ZK and deep storage - //rn.setSegmentPublisher(new NoopSegmentPublisher()); - //rn.setAnnouncer(new NoopDataSegmentAnnouncer()); - //rn.setDataSegmentPusher(new NoopDataSegmentPusher()); - //rn.setServerView(new NoopServerView()); - //rn.setInventoryView(new NoopInventoryView()); + rn.setSegmentPublisher(new NoopSegmentPublisher()); + rn.setAnnouncer(new NoopDataSegmentAnnouncer()); + rn.setDataSegmentPusher(new NoopDataSegmentPusher()); + rn.setServerView(new NoopServerView()); + rn.setInventoryView(new NoopInventoryView()); Runtime.getRuntime().addShutdownHook( new Thread(