Merge branch 'master' into fix-config

This commit is contained in:
fjy 2013-09-27 17:17:46 -07:00
commit 4a7a28a7f6
98 changed files with 2347 additions and 10559 deletions

View File

@ -2,4 +2,4 @@ name: Your New Jekyll Site
pygments: true
markdown: redcarpet
redcarpet:
extensions: ["no_intra_emphasis", "fenced_code_blocks", "autolink", "tables", "with_toc_data"]
extensions: ["no_intra_emphasis", "fenced_code_blocks", "disable_indented_code_blocks", "tables", "with_toc_data"]

View File

@ -0,0 +1,51 @@
<!-- Start page_footer include -->
<div class="container">
<footer>
<div class="container">
<hr>
<div class="row">
<div class="col-md-4">
<address>
<strong>CONTACT US</strong>
<a href="mailto:info@druid.io">info@druid.io</a>
</address>
<address>
<strong>Metamarkets</strong>
625 2nd Street, Suite #230<br>
San Francisco, CA 94017<br>
<div class="soc">
<a href="https://twitter.com/druidio"></a>
<a href="https://github.com/metamx/druid" class="github"></a>
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
</div>
</div>
<ul class="col-md-4 list-unstyled">
<li><a href="/"><strong>DRUID</strong></a></li>
<li><a href="/druid.html">What is Druid?</a></li>
<li><a href="/downloads.html">Downloads</a></li>
<li><a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation</a></li>
</ul>
<ul class="col-md-4 list-unstyled">
<li><a href="/community.html"><strong>SUPPORT</strong></a></li>
<li><a href="/community.html">Community</a></li>
<li><a href="/faq.html">FAQ</a></li>
<li><a href="/licensing.html">Licensing</a></li>
<li><a href="/blog"><strong>BLOG</strong></a></li>
</ul>
</div>
</div>
</footer>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-40280432-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
<!-- stop page_footer include -->

View File

@ -0,0 +1,27 @@
<!-- Start page_header include -->
<div class="navbar navbar-inverse navbar-static-top">
<div class="container druid-navbar">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/">Druid</a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li {% if page.id == 'home' %} class="active"{% endif %}><a href="/">Home</a></li>
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}><a href="/druid.html">What is Druid?</a></li>
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}><a href="/downloads.html">Downloads</a></li>
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
<li {% if page.sectionid == 'community' %} class="active"{% endif %}><a href="/community.html">Community</a></li>
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}><a href="/faq.html">FAQ</a></li>
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}><a href="/blog">Blog</a></li>
<li class="divider"></li>
</ul>
</div>
</div>
</div>
<!-- Stop page_header include --->

View File

@ -0,0 +1,19 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="druid">
<title>Druid | {{page.title}}</title>
<!-- Latest compiled and minified CSS -->
<link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.css">
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
<link rel="stylesheet" href="//druid.io/css/main.css">
<link rel="stylesheet" href="//druid.io/css/header.css">
<link rel="stylesheet" href="//druid.io/css/footer.css">
<link rel="stylesheet" href="//druid.io/css/syntax.css">

View File

@ -1,147 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Druid | {{page.title}}</title>
<link rel="stylesheet" type="text/css" href="/css/bootstrap.css" media="all" />
<link rel="stylesheet" type="text/css" href="/css/bootstrap-responsive.css" media="all" />
<link rel="stylesheet" type="text/css" href="/css/syntax.css" media="all" />
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
<script src="http://code.jquery.com/jquery.js"></script>
<script src="/js/bootstrap.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<div class="wrapper">
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
<div class="container custom">
<div class="row-fluid">
<div class="span12">
<div class="navbar navbar-inverse custom">
<div class="navbar-inner">
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
<div class="nav-collapse collapse">
<ul class="nav">
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
<a href="/druid.html">What is Druid?</a>
</li>
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
<a href="/downloads.html">Downloads</a>
</li>
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
</li>
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
<a href="/community.html">Community</a>
</li>
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
<a href="/faq.html">FAQ</a>
</li>
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
<a href="/blog">Blog</a>
</li>
<li class="pull-right">
<span>BETA</span>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
{% if page.id == 'home' %}
<h3>Druid is open-source infrastructure for real&sup2;time exploratory analytics on large datasets.</h3>
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
{% endif %}
</div>
</header>
<div class="container custom main-cont">
{{ content }}
</div>
</div>
<footer>
<div class="container custom">
<div class="row-fluid">
<div class="span3">
<div class="contact-item">
<span>CONTACT US</span>
<a href="mailto:info@druid.io">info@druid.io</a>
</div>
<div class="contact-item">
<span>Metamarkets</span>
625 2nd Street, Suite #230<br/>
San Francisco, CA 94017
<div class="soc">
<a href="https://twitter.com/druidio"></a>
<a href="https://github.com/metamx/druid" class="github"></a>
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
</div>
</div>
</div>
<div class="span9">
<ul class="unstyled">
<li>
<a href="/">DRUID</a>
</li>
<li>
<a href="/druid.html">What is Druid?</a>
</li>
<li>
<a href="/downloads.html">Downloads</a>
</li>
<li>
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/community.html">SUPPORT</a>
</li>
<li>
<a href="/community.html">Community</a>
</li>
<li>
<a href="/faq.html">FAQ</a>
</li>
<li>
<a href="/licensing.html">Licensing</a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/blog">BLOG</a>
</li>
</ul>
<div class="logo-block">
<span class="logo custom">
<a href="/"></a>
</span>
<p>is an open source project sponsored by<br/> Metamarkets.</p>
</div>
</div>
</div>
</div>
</footer>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-40280432-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>

View File

@ -0,0 +1,36 @@
<!DOCTYPE html>
<html lang="en">
<head>
{% include site_head.html %}
<link rel="stylesheet" href="css/docs.css">
</head>
<body>
{% include page_header.html %}
<div class="container">
<div class="page-header">
<h1>Documentation</h1>
</div>
<div class="row">
<div class="col-md-3 toc" id="toc">
</div>
<div class="col-md-9 doc-content">
{{ content }}
</div>
</div>
</div>
{% include page_footer.html %}
<script src="http://code.jquery.com/jquery.js"></script>
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
<script>
$(function(){
$("#toc").load("toc.html");
});
</script>
</body>
</html>

View File

@ -1,8 +0,0 @@
---
layout: default
---
<div class="row-fluid">
{{ content }}
</div>

View File

@ -1,147 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Druid | {{page.title}}</title>
<link rel="stylesheet" type="text/css" href="css/bootstrap.css" media="all" />
<link rel="stylesheet" type="text/css" href="css/bootstrap-responsive.css" media="all" />
<link rel="stylesheet" type="text/css" href="css/syntax.css" media="all" />
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
<script src="http://code.jquery.com/jquery.js"></script>
<script src="/js/bootstrap.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<div class="wrapper">
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
<div class="container custom">
<div class="row-fluid">
<div class="span12">
<div class="navbar navbar-inverse custom">
<div class="navbar-inner">
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
<div class="nav-collapse collapse">
<ul class="nav">
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
<a href="/druid.html">What is Druid?</a>
</li>
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
<a href="/downloads.html">Downloads</a>
</li>
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
</li>
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
<a href="/community.html">Community</a>
</li>
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
<a href="/faq.html">FAQ</a>
</li>
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
<a href="/blog">Blog</a>
</li>
<li class="pull-right">
<span>BETA</span>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
{% if page.id == 'home' %}
<h3>Druid is open-source infrastructure for real&sup2;time exploratory analytics on large datasets.</h3>
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
{% endif %}
</div>
</header>
<div class="container custom main-cont">
{{ content }}
</div>
</div>
<footer>
<div class="container custom">
<div class="row-fluid">
<div class="span3">
<div class="contact-item">
<span>CONTACT US</span>
<a href="mailto:info@druid.io">info@druid.io</a>
</div>
<div class="contact-item">
<span>Metamarkets</span>
625 2nd Street, Suite #230<br/>
San Francisco, CA 94017
<div class="soc">
<a href="https://twitter.com/druidio"></a>
<a href="https://github.com/metamx/druid" class="github"></a>
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
</div>
</div>
</div>
<div class="span9">
<ul class="unstyled">
<li>
<a href="/">DRUID</a>
</li>
<li>
<a href="/druid.html">What is Druid?</a>
</li>
<li>
<a href="/downloads.html">Downloads</a>
</li>
<li>
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/community.html">SUPPORT</a>
</li>
<li>
<a href="/community.html">Community</a>
</li>
<li>
<a href="/faq.html">FAQ</a>
</li>
<li>
<a href="/licensing.html">Licensing</a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/blog">BLOG</a>
</li>
</ul>
<div class="logo-block">
<span class="logo custom">
<a href="/"></a>
</span>
<p>is an open source project sponsored by<br/> Metamarkets.</p>
</div>
</div>
</div>
</div>
</footer>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-40280432-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>

View File

@ -1,11 +0,0 @@
---
layout: default
---
<div class="row-fluid">
<div class="span10 offset1{% if page.id != 'home' %} simple-page{% endif %}{% if page.sectionid == 'faq' %} faq-page{% endif %}">
{{ content }}
</div>
</div>

View File

@ -1,44 +0,0 @@
---
layout: default
sectionid: blog
---
<div class="row-fluid">
<div class="span4 recent">
<h3>Recent posts</h3>
<ul class="unstyled">
{% for post in site.posts limit: 5 %}
<li{% if page.title == post.title %} class="active"{% endif %}><a href="{{ post.url }}">{{ post.title }}</a></li>
{% endfor %}
</ul>
</div>
<div class="span8 simple-page">
<div class="text-item blog inner">
<h2 class="date">
<span>{{ page.title }}</span>
<span>{{ page.date | date: "%B %e, %Y" }} · {{ page.author | upcase }}</span>
</h2>
{% if page.image %}<img src="{{ page.image }}" alt="{{ page.title }}" class="text-img" />{% endif %}
{{ content }}
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'druidio'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
</div>
</div>
</div>

View File

@ -1,20 +1,26 @@
---
layout: docs_default
layout: doc_page
---
Aggregations are specifications of processing over metrics available in Druid.
Available aggregations are:
### Count aggregator
`count` computes the row count that match the filters
```json
{ "type" : "count", "name" : <output_name> }
```
### Sum aggregators
#### `longSum` aggregator
computes the sum of values as a 64-bit, signed integer
<code>{
"type" : "longSum",
"name" : <output_name>,
"fieldName" : <metric_name>
}</code>
```json
{ "type" : "longSum", "name" : <output_name>, "fieldName" : <metric_name> }
```
`name` output name for the summed value
`fieldName` name of the metric column to sum over
@ -23,20 +29,9 @@ computes the sum of values as a 64-bit, signed integer
Computes the sum of values as 64-bit floating point value. Similar to `longSum`
<code>{
"type" : "doubleSum",
"name" : <output_name>,
"fieldName" : <metric_name>
}</code>
### Count aggregator
`count` computes the row count that match the filters
<code>{
"type" : "count",
"name" : <output_name>,
}</code>
```json
{ "type" : "doubleSum", "name" : <output_name>, "fieldName" : <metric_name> }
```
### Min / Max aggregators
@ -44,21 +39,17 @@ Computes the sum of values as 64-bit floating point value. Similar to `longSum`
`min` computes the minimum metric value
<code>{
"type" : "min",
"name" : <output_name>,
"fieldName" : <metric_name>
}</code>
```json
{ "type" : "min", "name" : <output_name>, "fieldName" : <metric_name> }
```
#### `max` aggregator
`max` computes the maximum metric value
<code>{
"type" : "max",
"name" : <output_name>,
"fieldName" : <metric_name>
}</code>
```json
{ "type" : "max", "name" : <output_name>, "fieldName" : <metric_name> }
```
### JavaScript aggregator
@ -66,25 +57,27 @@ Computes an arbitrary JavaScript function over a set of columns (both metrics an
All JavaScript functions must return numerical values.
<code>{
"type": "javascript",
"name": "<output_name>",
"fieldNames" : [ <column1>, <column2>, ... ],
"fnAggregate" : "function(current, column1, column2, ...) {
<updates partial aggregate (current) based on the current row values>
return <updated partial aggregate>
}"
"fnCombine" : "function(partialA, partialB) { return <combined partial results>; }"
"fnReset" : "function() { return <initial value>; }"
}</code>
```json
{ "type": "javascript", "name": "<output_name>",
"fieldNames" : [ <column1>, <column2>, ... ],
"fnAggregate" : "function(current, column1, column2, ...) {
<updates partial aggregate (current) based on the current row values>
return <updated partial aggregate>
}",
"fnCombine" : "function(partialA, partialB) { return <combined partial results>; }",
"fnReset" : "function() { return <initial value>; }"
}
```
**Example**
<code>{
"type": "javascript",
"name": "sum(log(x)/y) + 10",
"fieldNames": ["x", "y"],
"fnAggregate" : "function(current, a, b) { return current + (Math.log(a) * b); }"
"fnCombine" : "function(partialA, partialB) { return partialA + partialB; }"
"fnReset" : "function() { return 10; }"
}</code>
```json
{
"type": "javascript",
"name": "sum(log(x)/y) + 10",
"fieldNames": ["x", "y"],
"fnAggregate" : "function(current, a, b) { return current + (Math.log(a) * b); }",
"fnCombine" : "function(partialA, partialB) { return partialA + partialB; }",
"fnReset" : "function() { return 10; }"
}
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Batch Data Ingestion
====================
@ -18,52 +18,52 @@ HadoopDruidIndexer
Located at `com.metamx.druid.indexer.HadoopDruidIndexerMain` can be run like
<code>
java -cp hadoop_config_path:druid_indexer_selfcontained_jar_path com.metamx.druid.indexer.HadoopDruidIndexerMain <config_file>
</code>
```
java -cp hadoop_config_path:druid_indexer_selfcontained_jar_path com.metamx.druid.indexer.HadoopDruidIndexerMain <config_file>
```
The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals) of the data you are processing. The config\_file is a path to a file (the “specFile”) that contains JSON and an example looks like:
The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals) of the data you are processing. The config\_file is a path to a file (the "specFile") that contains JSON and an example looks like:
<code>
{
"dataSource": "the_data_source",
"timestampColumn": "ts",
"timestampFormat": "<iso, millis, posix, auto or any Joda time format>",
"dataSpec": {
"format": "<csv, tsv, or json>",
"columns": ["ts", "column_1", "column_2", "column_3", "column_4", "column_5"],
"dimensions": ["column_1", "column_2", "column_3"]
},
"granularitySpec": {
"type":"uniform",
"intervals":["<ISO8601 interval:http://en.wikipedia.org/wiki/ISO_8601#Time_intervals>"],
"gran":"day"
},
"pathSpec": { "type": "granularity",
"dataGranularity": "hour",
"inputPath": "s3n://billy-bucket/the/data/is/here",
"filePattern": ".*" },
"rollupSpec": { "aggs": [
{ "type": "count", "name":"event_count" },
{ "type": "doubleSum", "fieldName": "column_4", "name": "revenue" },
{ "type": "longSum", "fieldName" : "column_5", "name": "clicks" }
],
"rollupGranularity": "minute"},
"workingPath": "/tmp/path/on/hdfs",
"segmentOutputPath": "s3n://billy-bucket/the/segments/go/here",
"leaveIntermediate": "false",
"partitionsSpec": {
"targetPartitionSize": 5000000
},
"updaterJobSpec": {
"type":"db",
"connectURI":"jdbc:mysql://localhost:7980/test_db",
"user":"username",
"password":"passmeup",
"segmentTable":"segments"
}
}
</code>
```
{
"dataSource": "the_data_source",
"timestampColumn": "ts",
"timestampFormat": "<iso, millis, posix, auto or any Joda time format>",
"dataSpec": {
"format": "<csv, tsv, or json>",
"columns": ["ts", "column_1", "column_2", "column_3", "column_4", "column_5"],
"dimensions": ["column_1", "column_2", "column_3"]
},
"granularitySpec": {
"type":"uniform",
"intervals":["<ISO8601 interval:http://en.wikipedia.org/wiki/ISO_8601#Time_intervals>"],
"gran":"day"
},
"pathSpec": { "type": "granularity",
"dataGranularity": "hour",
"inputPath": "s3n://billy-bucket/the/data/is/here",
"filePattern": ".*" },
"rollupSpec": { "aggs": [
{ "type": "count", "name":"event_count" },
{ "type": "doubleSum", "fieldName": "column_4", "name": "revenue" },
{ "type": "longSum", "fieldName" : "column_5", "name": "clicks" }
],
"rollupGranularity": "minute"},
"workingPath": "/tmp/path/on/hdfs",
"segmentOutputPath": "s3n://billy-bucket/the/segments/go/here",
"leaveIntermediate": "false",
"partitionsSpec": {
"targetPartitionSize": 5000000
},
"updaterJobSpec": {
"type":"db",
"connectURI":"jdbc:mysql://localhost:7980/test_db",
"user":"username",
"password":"passmeup",
"segmentTable":"segments"
}
}
```
### Hadoop indexer config
@ -100,10 +100,12 @@ Is a type of data loader that expects data to be laid out in a specific path for
For example, if the sample config were run with the interval 2012-06-01/2012-06-02, it would expect data at the paths
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=00
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=01
...
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=23
```
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=00
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=01
...
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=23
```
### Rollup specification
@ -116,7 +118,7 @@ The indexing process has the ability to roll data up as it processes the incomin
### Partitioning specification
Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension “last\_name” into two segments: one with all values from A-M and one with all values from N-Z.
Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.
To use this option, the indexer must be given a target partition size. It can then find a good set of partition ranges on its own.
@ -132,7 +134,7 @@ This is a specification of the properties that tell the job how to update metada
|property|description|required?|
|--------|-----------|---------|
|type|“db” is the only value available|yes|
|type|"db" is the only value available|yes|
|connectURI|a valid JDBC url to MySQL|yes|
|user|username for db|yes|
|password|password for db|yes|

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
# Booting a Single Node Cluster #
@ -27,4 +27,4 @@ cd whirr
git checkout trunk
mvn clean install -Dmaven.test.failure.ignore=true -Dcheckstyle.skip
sp;bin/whirr launch-cluster --config recipes/druid.properties
```
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Broker
======

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
### Clone and Build from Source
@ -20,5 +20,6 @@ LICENSE client eclipse_formatting.xml index-common merger realtime
```
You can find the example executables in the examples/bin directory:
* run_example_server.sh
* run_example_client.sh

View File

@ -1,114 +0,0 @@
---
layout: default
---
A Druid cluster consists of various node types that need to be set up depending on your use case. See our [Design](Design.html) docs for a description of the different node types.
Setup Scripts
-------------
One of our community members, [housejester](https://github.com/housejester/), contributed some scripts to help with setting up a cluster. Checkout the [github](https://github.com/housejester/druid-test-harness) and [wiki](https://github.com/housejester/druid-test-harness/wiki/Druid-Test-Harness).
Minimum Physical Layout: Absolute Minimum
-----------------------------------------
As a special case, the absolute minimum setup is one of the standalone examples for realtime ingestion and querying; see [Examples](Examples.html) that can easily run on one machine with one core and 1GB RAM. This layout can be set up to try some basic queries with Druid.
Minimum Physical Layout: Experimental Testing with 4GB of RAM
-------------------------------------------------------------
This layout can be used to load some data from deep storage onto a Druid compute node for the first time. A minimal physical layout for a 1 or 2 core machine with 4GB of RAM is:
1. node1: [Master](Master.html) + metadata service + zookeeper + [Compute](Compute.html)
2. transient nodes: indexer
This setup is only reasonable to prove that a configuration works. It would not be worthwhile to use this layout for performance measurement.
Comfortable Physical Layout: Pilot Project with Multiple Machines
-----------------------------------------------------------------
*The machine size “flavors” are using AWS/EC2 terminology for descriptive purposes only and is not meant to imply that AWS/EC2 is required or recommended. Another cloud provider or your own hardware can also work.*
A minimal physical layout not constrained by cores that demonstrates parallel querying and realtime, using AWS-EC2 “small”/m1.small (one core, with 1.7GB of RAM) or larger, no realtime, is:
1. node1: [Master](Master.html) (m1.small)
2. node2: metadata service (m1.small)
3. node3: zookeeper (m1.small)
4. node4: [Broker](Broker.html) (m1.small or m1.medium or m1.large)
5. node5: [Compute](Compute.html) (m1.small or m1.medium or m1.large)
6. node6: [Compute](Compute.html) (m1.small or m1.medium or m1.large)
7. node7: [Realtime](Realtime.html) (m1.small or m1.medium or m1.large)
8. transient nodes: indexer
This layout naturally lends itself to adding more RAM and core to Compute nodes, and to adding many more Compute nodes. Depending on the actual load, the Master, metadata server, and Zookeeper might need to use larger machines.
High Availability Physical Layout
---------------------------------
*The machine size “flavors” are using AWS/EC2 terminology for descriptive purposes only and is not meant to imply that AWS/EC2 is required or recommended. Another cloud provider or your own hardware can also work.*
An HA layout allows full rolling restarts and heavy volume:
1. node1: [Master](Master.html) (m1.small or m1.medium or m1.large)
2. node2: [Master](Master.html) (m1.small or m1.medium or m1.large) (backup)
3. node3: metadata service (c1.medium or m1.large)
4. node4: metadata service (c1.medium or m1.large) (backup)
5. node5: zookeeper (c1.medium)
6. node6: zookeeper (c1.medium)
7. node7: zookeeper (c1.medium)
8. node8: [Broker](Broker.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
9. node9: [Broker](Broker.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge) (backup)
10. node10: [Compute](Compute.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
11. node11: [Compute](Compute.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
12. node12: [Realtime](Realtime.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
13. transient nodes: indexer
Sizing for Cores and RAM
------------------------
The Compute and Broker nodes will use as many cores as are available, depending on usage, so it is best to keep these on dedicated machines. The upper limit of effectively utilized cores is not well characterized yet and would depend on types of queries, query load, and the schema. Compute daemons should have a heap a size of at least 1GB per core for normal usage, but could be squeezed into a smaller heap for testing. Since in-memory caching is essential for good performance, even more RAM is better. Broker nodes will use RAM for caching, so they do more than just route queries.
The effective utilization of cores by Zookeeper, MySQL, and Master nodes is likely to be between 1 and 2 for each process/daemon, so these could potentially share a machine with lots of cores. These daemons work with heap a size between 500MB and 1GB.
Storage
-------
Indexed segments should be kept in a permanent store accessible by all nodes like AWS S3 or HDFS or equivalent. Currently Druid supports S3, but this will be extended soon.
Local disk (“ephemeral” on AWS EC2) for caching is recommended over network mounted storage (example of mounted: AWS EBS, Elastic Block Store) in order to avoid network delays during times of heavy usage. If your data center is suitably provisioned for networked storage, perhaps with separate LAN/NICs just for storage, then mounted might work fine.
Setup
-----
Setting up a cluster is essentially just firing up all of the nodes you want with the proper [configuration](configuration.html). One thing to be aware of is that there are a few properties in the configuration that potentially need to be set individually for each process:
<code>
druid.server.type=historical|realtime
druid.host=someHostOrIPaddrWithPort
druid.port=8080
</code>
`druid.server.type` should be set to “historical” for your compute nodes and realtime for the realtime nodes. The master will only assign segments to a “historical” node and the broker has some intelligence around its ability to cache results when talking to a realtime node. This does not need to be set for the master or the broker.
`druid.host` should be set to the hostname and port that can be used to talk to the given server process. Basically, someone should be able to send a request to http://\${druid.host}/ and actually talk to the process.
`druid.port` should be set to the port that the server should listen on. In the vast majority of cases, this port should be the same as what is on `druid.host`.
Build/Run
---------
The simplest way to build and run from the repository is to run `mvn package` from the base directory and then take `druid-services/target/druid-services-*-selfcontained.jar` and push that around to your machines; the jar does not need to be expanded, and since it contains the main() methods for each kind of service, it is **not** invoked with java ~~jar. It can be run from a normal java command-line by just including it on the classpath and then giving it the main class that you want to run. For example one instance of the Compute node/service can be started like this:
\<pre\>
<code>
java~~Duser.timezone=UTC ~~Dfile.encoding=UTF-8~~cp compute/:druid-services/target/druid-services~~\*~~selfcontained.jar com.metamx.druid.http.ComputeMain
</code>
</pre>
The following table shows the possible services and fully qualified class for main().
|service|main class|
|-------|----------|
|[ Realtime ]( Realtime .html)|com.metamx.druid.realtime.RealtimeMain|
|[ Master ]( Master .html)|com.metamx.druid.http.MasterMain|
|[ Broker ]( Broker .html)|com.metamx.druid.http.BrokerMain|
|[ Compute ]( Compute .html)|com.metamx.druid.http.ComputeMain|

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Compute
=======

View File

@ -1,15 +1,14 @@
---
layout: default
layout: doc_page
---
Concepts and Terminology
========================
- **Aggregators:** A mechanism for combining records during realtime incremental indexing, Hadoop batch indexing, and in queries.
- **DataSource:** A table-like view of data; specified in a “specFile” and in a query.
- **Granularity:** The time interval corresponding to aggregation by time.
- The *indexGranularity* setting in a schema is used to aggregate input (ingest) records within an interval into a single output (internal) record.
- The *segmentGranularity* is the interval specifying how internal records are stored together in a single file.
- **Segment:** A collection of (internal) records that are stored and processed together.
- **Shard:** A unit of partitioning data across machine. TODO: clarify; by time or other dimensions?
- **specFile** is specification for services in JSON format; see [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html)
* **Aggregators**: A mechanism for combining records during realtime incremental indexing, Hadoop batch indexing, and in queries.
* **DataSource**: A table-like view of data; specified in a "specFile" and in a query.
* **Granularity**: The time interval corresponding to aggregation by time.
* **indexGranularity**: specifies the granularity used to bucket timestamps within a segment.
* **segmentGranularity**: specifies the granularity of the segment, i.e. the amount of time a segment will represent
* **Segment**: A collection of (internal) records that are stored and processed together.
* **Shard**: A sub-partition of the data in a segment. It is possible to have multiple segments represent all data for a given segmentGranularity.
* **specFile**: is specification for services in JSON format; see [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html)

View File

@ -1,7 +1,7 @@
---
layout: default
layout: doc_page
---
This describes the basic server configuration that is loaded by all the server processes; the same file is loaded by all. See also the json “specFile” descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
This describes the basic server configuration that is loaded by all the server processes; the same file is loaded by all. See also the json "specFile" descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
JVM Configuration Best Practices
================================
@ -17,63 +17,63 @@ Basic Service Configuration
Configuration of the various nodes is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath. Note: as a future item, Id like to consolidate all of the various configuration into a yaml/JSON based configuration files.
The periodic time intervals (like “PT1M”) are [ISO8601 intervals](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals)
The periodic time intervals (like "PT1M") are [ISO8601 intervals](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals)
An example runtime.properties is as follows:
<code>
# S3 access
com.metamx.aws.accessKey=<S3 access key>
com.metamx.aws.secretKey=<S3 secret_key>
```
# S3 access
com.metamx.aws.accessKey=<S3 access key>
com.metamx.aws.secretKey=<S3 secret_key>
# thread pool size for servicing queries
druid.client.http.connections=30
# thread pool size for servicing queries
druid.client.http.connections=30
# JDBC connection string for metadata database
druid.database.connectURI=
druid.database.user=user
druid.database.password=password
# time between polling for metadata database
druid.database.poll.duration=PT1M
druid.database.segmentTable=prod_segments
# JDBC connection string for metadata database
druid.database.connectURI=
druid.database.user=user
druid.database.password=password
# time between polling for metadata database
druid.database.poll.duration=PT1M
druid.database.segmentTable=prod_segments
# Path on local FS for storage of segments; dir will be created if needed
druid.paths.indexCache=/tmp/druid/indexCache
# Path on local FS for storage of segment metadata; dir will be created if needed
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
# Path on local FS for storage of segments; dir will be created if needed
druid.paths.indexCache=/tmp/druid/indexCache
# Path on local FS for storage of segment metadata; dir will be created if needed
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
druid.request.logging.dir=/tmp/druid/log
druid.request.logging.dir=/tmp/druid/log
druid.server.maxSize=300000000000
druid.server.maxSize=300000000000
# ZK quorum IPs
druid.zk.service.host=
# ZK path prefix for Druid-usage of zookeeper, Druid will create multiple paths underneath this znode
druid.zk.paths.base=/druid
# ZK path for discovery, the only path not to default to anything
druid.zk.paths.discoveryPath=/druid/discoveryPath
# ZK quorum IPs
druid.zk.service.host=
# ZK path prefix for Druid-usage of zookeeper, Druid will create multiple paths underneath this znode
druid.zk.paths.base=/druid
# ZK path for discovery, the only path not to default to anything
druid.zk.paths.discoveryPath=/druid/discoveryPath
# the host:port as advertised to clients
druid.host=someHostOrIPaddrWithPort
# the port on which to listen, this port should line up with the druid.host value
druid.port=8080
# the host:port as advertised to clients
druid.host=someHostOrIPaddrWithPort
# the port on which to listen, this port should line up with the druid.host value
druid.port=8080
com.metamx.emitter.logging=true
com.metamx.emitter.logging.level=debug
com.metamx.emitter.logging=true
com.metamx.emitter.logging.level=debug
druid.processing.formatString=processing_%s
druid.processing.numThreads=3
druid.processing.formatString=processing_%s
druid.processing.numThreads=3
druid.computation.buffer.size=100000000
druid.computation.buffer.size=100000000
# S3 dest for realtime indexer
druid.pusher.s3.bucket=
druid.pusher.s3.baseKey=
# S3 dest for realtime indexer
druid.pusher.s3.bucket=
druid.pusher.s3.baseKey=
druid.bard.cache.sizeInBytes=40000000
druid.master.merger.service=blah_blah
</code>
druid.bard.cache.sizeInBytes=40000000
druid.master.merger.service=blah_blah
```
Configuration groupings
-----------------------
@ -91,7 +91,7 @@ These properties are for connecting with S3 and using it to pull down segments.
### JDBC connection
These properties specify the jdbc connection and other configuration around the “segments table” database. The only processes that connect to the DB with these properties are the [Master](Master.html) and [Indexing service](Indexing-service.html). This is tested on MySQL.
These properties specify the jdbc connection and other configuration around the "segments table" database. The only processes that connect to the DB with these properties are the [Master](Master.html) and [Indexing service](Indexing-service.html). This is tested on MySQL.
|Property|Description|Default|
|--------|-----------|-------|
@ -142,7 +142,7 @@ These are properties that the compute nodes use
|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.maxSize`|The maximum number of bytes worth of segment that the node wants assigned to it. This is not a limit that the compute nodes actually enforce, they just publish it to the master and trust the master to do the right thing|none|
|`druid.server.type`|Specifies the type of the node. This is published via ZK and depending on the value the node will be treated specially by the Master/Broker. Allowed values are “realtime” or “historical”. This is a configuration parameter because the plan is to allow for a more configurable cluster composition. At the current time, all realtime nodes should just be “realtime” and all compute nodes should just be “compute”|none|
|`druid.server.type`|Specifies the type of the node. This is published via ZK and depending on the value the node will be treated specially by the Master/Broker. Allowed values are "realtime" or "historical". This is a configuration parameter because the plan is to allow for a more configurable cluster composition. At the current time, all realtime nodes should just be "realtime" and all compute nodes should just be "compute"|none|
### Emitter Properties
@ -150,7 +150,7 @@ The Druid servers emit various metrics and alerts via something we call an [Emit
|Property|Description|Default|
|--------|-----------|-------|
|`com.metamx.emitter.logging`|Set to “true” to use the logging emitter|none|
|`com.metamx.emitter.logging`|Set to "true" to use the logging emitter|none|
|`com.metamx.emitter.logging.level`|Sets the level to log at|debug|
|`com.metamx.emitter.logging.class`|Sets the class to log at|com.metamx.emiter.core.LoggingEmitter|

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
If you are interested in contributing to the code, we accept [pull requests](https://help.github.com/articles/using-pull-requests). Note: we have only just completed decoupling our Metamarkets-specific code from the code base and we took some short-cuts in interface design to make it happen. So, there are a number of interfaces that exist right now which are likely to be in flux. If you are embedding Druid in your system, it will be safest for the time being to only extend/implement interfaces that this wiki describes, as those are intended as stable (unless otherwise mentioned).

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Deep storage is where segments are stored. It is a storage mechanism that Druid does not provide. This deep storage infrastructure defines the level of durability of your data, as long as Druid nodes can see this storage infrastructure and get at the segments stored on it, you will not lose data no matter how many Druid nodes you lose. If segments disappear from this storage layer, then you will lose whatever data those segments represented.
@ -11,10 +11,12 @@ S3-compatible deep storage is basically either S3 or something like riak-cs whic
S3 configuration parameters are
com.metamx.aws.accessKey=<S3 access key>
com.metamx.aws.secretKey=<S3 secret_key>
druid.pusher.s3.bucket=<bucket to store in>
druid.pusher.s3.baseKey=<base key prefix to use, i.e. what directory>
```
com.metamx.aws.accessKey=<S3 access key>
com.metamx.aws.secretKey=<S3 secret_key>
druid.pusher.s3.bucket=<bucket to store in>
druid.pusher.s3.baseKey=<base key prefix to use, i.e. what directory>
```
## HDFS
@ -22,8 +24,10 @@ As of 0.4.0, HDFS can be used for storage of segments as well.
In order to use hdfs for deep storage, you need to set the following configuration on your realtime nodes.
druid.pusher.hdfs=true
druid.pusher.hdfs.storageDirectory=<directory for storing segments>
```
druid.pusher.hdfs=true
druid.pusher.hdfs.storageDirectory=<directory for storing segments>
```
If you are using the Hadoop indexer, set your output directory to be a location on Hadoop and it will work
@ -34,9 +38,11 @@ A local mount can be used for storage of segments as well. This allows you to u
In order to use a local mount for deep storage, you need to set the following configuration on your realtime nodes.
druid.pusher.local=true
druid.pusher.local.storageDirectory=<directory for storing segments>
```
druid.pusher.local=true
druid.pusher.local.storageDirectory=<directory for storing segments>
```
Note that you should generally set `druid.pusher.local.storageDirectory` to something different from `druid.paths.indexCache`.
If you are using the Hadoop indexer in local mode, then just give it a local file as your output directory and it will work.
If you are using the Hadoop indexer in local mode, then just give it a local file as your output directory and it will work.

View File

@ -1,12 +1,13 @@
---
layout: default
layout: doc_page
---
For a comprehensive look at the architecture of Druid, read the [White Paper](http://static.druid.io/docs/druid.pdf).
What is Druid?
==============
Druid is a system built to allow fast (“real-time”) access to large sets of seldom-changing data. It was designed with the intent of being a service and maintaining 100% uptime in the face of code deployments, machine failures and other eventualities of a production system. It can be useful for back-office use cases as well, but design decisions were made explicitly targetting an always-up service.
Druid is a system built to allow fast ("real-time") access to large sets of seldom-changing data. It was designed with the intent of being a service and maintaining 100% uptime in the face of code deployments, machine failures and other eventualities of a production system. It can be useful for back-office use cases as well, but design decisions were made explicitly targetting an always-up service.
Druid currently allows for single-table queries in a similar manner to [Dremel](http://research.google.com/pubs/pub36632.html) and [PowerDrill](http://www.vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf). It adds to the mix
@ -18,20 +19,21 @@ Druid currently allows for single-table queries in a similar manner to [Dremel](
As far as a comparison of systems is concerned, Druid sits in between PowerDrill and Dremel on the spectrum of functionality. It implements almost everything Dremel offers (Dremel handles arbitrary nested data structures while Druid only allows for a single level of array-based nesting) and gets into some of the interesting data layout and compression methods from PowerDrill.
Druid is a good fit for products that require real-time data ingestion of a single, large data stream. Especially if you are targetting no-downtime operation and are building your product on top of a time-oriented summarization of the incoming data stream. Druid is probably not the right solution if you care more about query flexibility and raw data access than query speed and no-downtime operation. When talking about query speed it is important to clarify what “fast” means, with Druid it is entirely within the realm of possibility (we have done it) to achieve queries that run in single-digit seconds across a 6TB data set.
Druid is a good fit for products that require real-time data ingestion of a single, large data stream. Especially if you are targetting no-downtime operation and are building your product on top of a time-oriented summarization of the incoming data stream. Druid is probably not the right solution if you care more about query flexibility and raw data access than query speed and no-downtime operation. When talking about query speed it is important to clarify what "fast" means, with Druid it is entirely within the realm of possibility (we have done it) to achieve queries that run in single-digit seconds across a 6TB data set.
### Architecture
Druid is architected as a grouping of systems each with a distinct role and together they form a working system. The name comes from the Druid class in many role-playing games: it is a shape-shifter, capable of taking many different forms to fulfill various different roles in a group.
The node types that currently exist are:
\* **Compute** nodes are the workhorses that handle storage and querying on “historical” data (non-realtime)
\* **Realtime** nodes ingest data in real-time, they are in charge of listening to a stream of incoming data and making it available immediately inside the Druid system. As data they have ingested ages, they hand it off to the compute nodes.
\* **Master** nodes act as coordinators. They look over the grouping of computes and make sure that data is available, replicated and in a generally “optimal” configuration.
\* **Broker** nodes understand the topology of data across all of the other nodes in the cluster and re-write and route queries accordingly
\* **Indexer** nodes form a cluster of workers to load batch and real-time data into the system as well as allow for alterations to the data stored in the system (also known as the Indexing Service)
This separation allows each node to only care about what it is best at. By separating Compute and Realtime, we separate the memory concerns of listening on a real-time stream of data and processing it for entry into the system. By separating the Master and Broker, we separate the needs for querying from the needs for maintaining “good” data distribution across the cluster.
* **Compute** nodes are the workhorses that handle storage and querying on "historical" data (non-realtime)
* **Realtime** nodes ingest data in real-time, they are in charge of listening to a stream of incoming data and making it available immediately inside the Druid system. As data they have ingested ages, they hand it off to the compute nodes.
* **Master** nodes act as coordinators. They look over the grouping of computes and make sure that data is available, replicated and in a generally "optimal" configuration.
* **Broker** nodes understand the topology of data across all of the other nodes in the cluster and re-write and route queries accordingly
* **Indexer** nodes form a cluster of workers to load batch and real-time data into the system as well as allow for alterations to the data stored in the system (also known as the Indexing Service)
This separation allows each node to only care about what it is best at. By separating Compute and Realtime, we separate the memory concerns of listening on a real-time stream of data and processing it for entry into the system. By separating the Master and Broker, we separate the needs for querying from the needs for maintaining "good" data distribution across the cluster.
All nodes can be run in some highly available fashion. Either as symmetric peers in a share-nothing cluster or as hot-swap failover nodes.
@ -39,7 +41,7 @@ Aside from these nodes, there are 3 external dependencies to the system:
1. A running [ZooKeeper](http://zookeeper.apache.org/) cluster for cluster service discovery and maintenance of current data topology
2. A MySQL instance for maintenance of metadata about the data segments that should be served by the system
3. A “deep storage” LOB store/file system to hold the stored segments
3. A "deep storage" LOB store/file system to hold the stored segments
### Data Storage
@ -53,9 +55,9 @@ Getting data into the Druid system requires an indexing process. This gives the
- Bitmap compression
- RLE (on the roadmap, but not yet implemented)
The output of the indexing process is stored in a “deep storage” LOB store/file system ([Deep Storage](Deep Storage.html) for information about potential options). Data is then loaded by compute nodes by first downloading the data to their local disk and then memory mapping it before serving queries.
The output of the indexing process is stored in a "deep storage" LOB store/file system ([Deep Storage](Deep Storage.html) for information about potential options). Data is then loaded by compute nodes by first downloading the data to their local disk and then memory mapping it before serving queries.
If a compute node dies, it will no longer serve its segments, but given that the segments are still available on the “deep storage” any other node can simply download the segment and start serving it. This means that it is possible to actually remove all compute nodes from the cluster and then re-provision them without any data loss. It also means that if the “deep storage” is not available, the nodes can continue to serve the segments they have already pulled down (i.e. the cluster goes stale, not down).
If a compute node dies, it will no longer serve its segments, but given that the segments are still available on the "deep storage" any other node can simply download the segment and start serving it. This means that it is possible to actually remove all compute nodes from the cluster and then re-provision them without any data loss. It also means that if the "deep storage" is not available, the nodes can continue to serve the segments they have already pulled down (i.e. the cluster goes stale, not down).
In order for a segment to exist inside of the cluster, an entry has to be added to a table in a MySQL instance. This entry is a self-describing bit of metadata about the segment, it includes things like the schema of the segment, the size, and the location on deep storage. These entries are what the Master uses to know what data **should** be available on the cluster.
@ -65,7 +67,7 @@ In order for a segment to exist inside of the cluster, an entry has to be added
- **Master** Can be run in a hot fail-over configuration. If no masters are running, then changes to the data topology will stop happening (no new data and no data balancing decisions), but the system will continue to run.
- **Broker** Can be run in parallel or in hot fail-over.
- **Realtime** Depending on the semantics of the delivery stream, multiple of these can be run in parallel processing the exact same stream. They periodically checkpoint to disk and eventually push out to the Computes. Steps are taken to be able to recover from process death, but loss of access to the local disk can result in data loss if this is the only method of adding data to the system.
- **“deep storage” file system** If this is not available, new data will not be able to enter the cluster, but the cluster will continue operating as is.
- **"deep storage" file system** If this is not available, new data will not be able to enter the cluster, but the cluster will continue operating as is.
- **MySQL** If this is not available, the master will be unable to find out about new segments in the system, but it will continue with its current view of the segments that should exist in the cluster.
- **ZooKeeper** If this is not available, data topology changes will not be able to be made, but the Brokers will maintain their most recent view of the data topology and continue serving requests accordingly.
@ -77,12 +79,8 @@ For filters at a more granular level than what the Broker can prune based on, th
Once it knows the rows that match the current query, it can access the columns it cares about for those rows directly without having to load data that it is just going to throw away.
The following diagram shows the data flow for queries without showing batch indexing:
![Simple Data Flow](https://raw.github.com/metamx/druid/master/doc/data_flow_simple.png "Simple Data Flow")
### In-memory?
Druid is not always and only in-memory. When we first built it, it is true that it was all in-memory all the time, but as time went on the price-performance tradeoff ended up swinging towards keeping all of our customers data in memory all the time a non-starter. We then added the ability to memory map data and allow the OS to handle paging data in and out of memory on demand. Our production cluster is primarily configured to operate with this memory mapping behavior and we are definitely over-subscribed in terms of memory available vs. data a node is serving.
As you read some of the old blog posts or other literature about the project, you will see “in-memory” often touted as that is the history of where Druid came from, but the technical reality is that there is a spectrum of price vs. performance and being able to slide along it from all in-memory (high cost, great performance) to mostly on disk (low cost, low performance) is the important knob to be able to adjust.
As you read some of the old blog posts or other literature about the project, you will see "in-memory" often touted as that is the history of where Druid came from, but the technical reality is that there is a spectrum of price vs. performance and being able to slide along it from all in-memory (high cost, great performance) to mostly on disk (low cost, low performance) is the important knob to be able to adjust.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
A version may be declared as a release candidate if it has been deployed to a sizable production cluster. Release candidates are declared as stable after we feel fairly confident there are no major bugs in the version. Check out the [Versioning](Versioning.html) section for how we describe software versions.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
# Druid Personal Demo Cluster (DPDC)

View File

@ -1,8 +1,8 @@
---
layout: default
layout: doc_page
---
We are not experts on Cassandra, if anything is incorrect about our portrayal, please let us know on the mailing list or via some other means. We will fix this page.
Druid is highly optimized for scans and aggregations, it supports arbitrarily deep drill downs into data sets without the need to pre-compute, and it can ingest event streams in real-time and allow users to query events as they come in. Cassandra is a great key-value store and it has some features that allow you to use it to do more interesting things than what you can do with a pure key-value store. But, it is not built for the same use cases that Druid handles, namely regularly scanning over billions of entries per query.
Furthermore, Druid is fully read-consistent. Druid breaks down a data set into immutable chunks known as segments. All replicants always present the exact same view for the piece of data they are holding and we dont have to worry about data synchronization. The tradeoff is that Druid has limited semantics for write and update operations. Cassandra, similar to Amazons Dynamo, has an eventually consistent data model. Writes are always supported but updates to data may take some time before all replicas sync up (data reconciliation is done at read time). This model favors availability and scalability over consistency.
Furthermore, Druid is fully read-consistent. Druid breaks down a data set into immutable chunks known as segments. All replicants always present the exact same view for the piece of data they are holding and we dont have to worry about data synchronization. The tradeoff is that Druid has limited semantics for write and update operations. Cassandra, similar to Amazons Dynamo, has an eventually consistent data model. Writes are always supported but updates to data may take some time before all replicas sync up (data reconciliation is done at read time). This model favors availability and scalability over consistency.

View File

@ -1,6 +1,6 @@
---
layout: default
layout: doc_page
---
Druid is a complementary addition to Hadoop. Hadoop is great at storing and making accessible large amounts of individually low-value data. Unfortunately, Hadoop is not great at providing query speed guarantees on top of that data, nor does it have very good operational characteristics for a customer-facing production system. Druid, on the other hand, excels at taking high-value summaries of the low-value data on Hadoop, making it available in a fast and always-on fashion, such that it could be exposed directly to a customer.
Druid also requires some infrastructure to exist for “deep storage”. HDFS is one of the implemented options for this “deep storage”.
Druid also requires some infrastructure to exist for [deep storage](Deep-Storage.html). HDFS is one of the implemented options for this [deep storage](Deep-Storage.html).

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
The question of Druid versus Impala or Shark basically comes down to your product requirements and what the systems were designed to do.
@ -42,4 +42,4 @@ Impala/Shark, being based on data in HDFS or some other backing store, are limit
Druid supports timeseries and groupBy style queries. It doesn't have support for joins, which makes it a lot less flexible for generic processing.
Impala/Shark support SQL style queries with full joins.
Impala/Shark support SQL style queries with full joins.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
###How does Druid compare to Redshift?
@ -19,7 +19,7 @@ Its write semantics arent as fluid and does not support joins. ParAccel is
###Data distribution model
Druids data distribution, is segment based which exists on highly available “deep” storage, like S3 or HDFS. Scaling up (or down) does not require massive copy actions or downtime; in fact, losing any number of compute nodes does not result in data loss because new compute nodes can always be brought up by reading data from “deep” storage.
Druids data distribution, is segment based which exists on highly available "deep" storage, like S3 or HDFS. Scaling up (or down) does not require massive copy actions or downtime; in fact, losing any number of compute nodes does not result in data loss because new compute nodes can always be brought up by reading data from "deep" storage.
To contrast, ParAccels data distribution model is hash-based. Expanding the cluster requires re-hashing the data across the nodes, making it difficult to perform without taking downtime. Amazons Redshift works around this issue with a multi-step process:
@ -37,4 +37,4 @@ ParAccels hash-based distribution generally means that replication is conduct
Along with column oriented structures, Druid uses indexing structures to speed up query execution when a filter is provided. Indexing structures do increase storage overhead (and make it more difficult to allow for mutation), but they can also significantly speed up queries.
ParAccel does not appear to employ indexing strategies.
ParAccel does not appear to employ indexing strategies.

View File

@ -1,10 +1,10 @@
---
layout: default
layout: doc_page
---
How does Druid compare to Vertica?
Vertica is similar to ParAccel/Redshift ([Druid-vs-Redshift](Druid-vs-Redshift.html)) described above in that it wasnt built for real-time streaming data ingestion and it supports full SQL.
The other big difference is that instead of employing indexing, Vertica tries to optimize processing by leveraging run-length encoding (RLE) and other compression techniques along with a “projection” system that creates materialized copies of the data in a different sort order (to maximize the effectiveness of RLE).
The other big difference is that instead of employing indexing, Vertica tries to optimize processing by leveraging run-length encoding (RLE) and other compression techniques along with a "projection" system that creates materialized copies of the data in a different sort order (to maximize the effectiveness of RLE).
We are unclear about how Vertica handles data distribution and replication, so we cannot speak to if/how Druid is different.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Examples
========
@ -15,21 +15,22 @@ There are two options for installing standalone Druid. Building from source, and
Clone Druid and build it:
<code>git clone https://github.com/metamx/druid.git druid
cd druid
git fetch --tags
git checkout druid-0.4.30
./build.sh
</code>
``` bash
git clone https://github.com/metamx/druid.git druid
cd druid
git fetch --tags
git checkout druid-0.4.30
./build.sh
```
### Downloading the DSK (Druid Standalone Kit)
[Download](http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz) a stand-alone tarball and run it:
<code>
tar -xzf druid-services-0.X.X-SNAPSHOT-bin.tar.gz
cd druid-services-0.X.X-SNAPSHOT
</code>
``` bash
tar -xzf druid-services-0.X.X-SNAPSHOT-bin.tar.gz
cd druid-services-0.X.X-SNAPSHOT
```
Twitter Example
---------------
@ -39,12 +40,12 @@ For a full tutorial based on the twitter example, check out this [Twitter Tutori
This Example uses a feature of Twitter that allows for sampling of its stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/metamx/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
### What youll learn
\* See how large amounts of data gets ingested into Druid in real-time
\* Learn how to do fast, interactive, analytics queries on that real-time data
* See how large amounts of data gets ingested into Druid in real-time
* Learn how to do fast, interactive, analytics queries on that real-time data
### What you need
\* A build of standalone Druid with the Twitter example (see above)
\* A Twitter username and password.
* A build of standalone Druid with the Twitter example (see above)
* A Twitter username and password.
### What youll do
@ -57,12 +58,15 @@ This uses `RandomFirehoseFactory` which emits a stream of random numbers (outCol
In a terminal window, (NOTE: If you are using the cloned Github repository these scripts are in ./examples/bin) start the server with:
`./run_example_server.sh`
`# type rand when prompted`
``` bash
./run_example_server.sh # type rand when prompted
```
In another terminal window:
`./run_example_client.sh`
`# type rand when prompted`
``` bash
./run_example_client.sh # type rand when prompted
```
The result of the client query is in JSON format. The client makes a REST request using the program `curl` which is usually installed on Linux, Unix, and OSX by default.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
A filter is a JSON object indicating which rows of data should be included in the computation for a query. Its essentially the equivalent of the WHERE clause in SQL. Druid supports the following types of filters.
@ -9,12 +9,9 @@ The simplest filter is a selector filter. The selector filter will match a speci
The grammar for a SELECTOR filter is as follows:
<code>"filter": {
"type": "selector",
"dimension": <dimension_string>,
"value": <dimension_value_string>
}
</code>
``` json
"filter": { "type": "selector", "dimension": <dimension_string>, "value": <dimension_value_string> }
```
This is the equivalent of `WHERE <dimension_string> = '<dimension_value_string>'`.
@ -22,12 +19,9 @@ This is the equivalent of `WHERE <dimension_string> = '<dimension_value_string>'
The regular expression filter is similar to the selector filter, but using regular expressions. It matches the specified dimension with the given pattern. The pattern can be any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html).
<code>"filter": {
"type": "regex",
"dimension": <dimension_string>,
"pattern": <pattern_string>
}
</code>
``` json
"filter": { "type": "regex", "dimension": <dimension_string>, "pattern": <pattern_string> }
```
### Logical expression filters
@ -35,11 +29,9 @@ The regular expression filter is similar to the selector filter, but using regul
The grammar for an AND filter is as follows:
<code>"filter": {
"type": "and",
"fields": [<filter>, <filter>, ...]
}
</code>
``` json
"filter": { "type": "and", "fields": [<filter>, <filter>, ...] }
```
The filters in fields can be any other filter defined on this page.
@ -47,11 +39,9 @@ The filters in fields can be any other filter defined on this page.
The grammar for an OR filter is as follows:
<code>"filter": {
"type": "or",
"fields": [<filter>, <filter>, ...]
}
</code>
``` json
"filter": { "type": "or", "fields": [<filter>, <filter>, ...] }
```
The filters in fields can be any other filter defined on this page.
@ -59,11 +49,9 @@ The filters in fields can be any other filter defined on this page.
The grammar for a NOT filter is as follows:
<code>"filter": {
"type": "not",
"field": <filter>
}
</code>
```json
"filter": { "type": "not", "field": <filter> }
```
The filter specified at field can be any other filter defined on this page.
@ -73,19 +61,21 @@ The JavaScript filter matches a dimension against the specified JavaScript funct
The function takes a single argument, the dimension value, and returns either true or false.
<code>{
"type" : "javascript",
"dimension" : <dimension_string>,
"function" : "function(value) { <...> }"
}
</code>
```json
{
"type" : "javascript",
"dimension" : <dimension_string>,
"function" : "function(value) { <...> }"
}
```
**Example**
The following matches any dimension values for the dimension `name` between `'bar'` and `'foo'`
<code>{
"type" : "javascript",
"dimension" : "name",
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
}
</code>
```json
{
"type" : "javascript",
"dimension" : "name",
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
}
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Firehoses describe the data stream source. They are pluggable and thus the configuration schema can and will vary based on the `type` of the firehose.
@ -11,7 +11,7 @@ We describe the configuration of the Kafka firehose from the example below, but
- `consumerProps` is a map of properties for the Kafka consumer. The JSON object is converted into a Properties object and passed along to the Kafka consumer.
- `feed` is the feed that the Kafka consumer should read from.
- `parser` represents a parser that knows how to convert from String representations into the required `InputRow` representation that Druid uses. This is a potentially reusable piece that can be found in many of the firehoses that are based on text streams. The spec in the example describes a JSON feed (new-line delimited objects), with a timestamp column called “timestamp” in ISO8601 format and that it should not include the dimension “value” when processing. More information about the options available for the parser are available [here](https://github.com/metamx/druid/wiki/Firehose#parsing-data).
- `parser` represents a parser that knows how to convert from String representations into the required `InputRow` representation that Druid uses. This is a potentially reusable piece that can be found in many of the firehoses that are based on text streams. The spec in the example describes a JSON feed (new-line delimited objects), with a timestamp column called "timestamp" in ISO8601 format and that it should not include the dimension "value" when processing. More information about the options available for the parser are available [here](https://github.com/metamx/druid/wiki/Firehose#parsing-data).
Available Firehoses
-------------------

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
The granularity field determines how data gets bucketed across the time dimension, i.e how it gets aggregated by hour, day, minute, etc.
@ -10,8 +10,9 @@ It can be specified either as a string for simple granularities or as an object
Simple granularities are specified as a string and bucket timestamps by their UTC time (i.e. days start at 00:00 UTC).
Supported granularity strings are: `all`, `none`, `minute`, `fifteen_minute`, `thirty_minute`, `hour` and `day`
\* **`all`** buckets everything into a single bucket
\* **`none`** does not bucket data (it actually uses the granularity of the index - minimum here is `none` which means millisecond granularity). Using `none` in a [timeseries query|TimeSeriesQuery](timeseries query|TimeSeriesQuery.html) is currently not recommended (the system will try to generate 0 values for all milliseconds that didnt exist, which is often a lot).
* `all` buckets everything into a single bucket
* `none` does not bucket data (it actually uses the granularity of the index - minimum here is `none` which means millisecond granularity). Using `none` in a [TimeSeriesQuery](TimeSeriesQuery.html) is currently not recommended (the system will try to generate 0 values for all milliseconds that didnt exist, which is often a lot).
### Duration Granularities
@ -19,11 +20,15 @@ Duration granularities are specified as an exact duration in milliseconds and ti
They also support specifying an optional origin, which defines where to start counting time buckets from (defaults to 1970-01-01T00:00:00Z).
<code>{"type": "duration", "duration": "7200000"}</code>
```
{"type": "duration", "duration": "7200000"}
```
This chunks up every 2 hours.
<code>{"type": "duration", "duration": "3600000", "origin": "2012-01-01T00:30:00Z"}</code>
```
{"type": "duration", "duration": "3600000", "origin": "2012-01-01T00:30:00Z"}
```
This chunks up every hour on the half-hour.
@ -38,11 +43,15 @@ By default years start on the first of January, months start on the first of the
Time zone is optional (defaults to UTC)
Origin is optional (defaults to 1970-01-01T00:00:00 in the given time zone)
<code>{"type": "period", "period": "P2D", "timeZone": "America/Los_Angeles"}</code>
```
{"type": "period", "period": "P2D", "timeZone": "America/Los_Angeles"}
```
This will bucket by two day chunks in the Pacific timezone.
<code>{"type": "period", "period": "P3M", "timeZone": "America/Los_Angeles", "origin": "2012-02-01T00:00:00-08:00"}</code>
```
{"type": "period", "period": "P3M", "timeZone": "America/Los_Angeles", "origin": "2012-02-01T00:00:00-08:00"}
```
This will bucket by 3 month chunks in the Pacific timezone where the three-month quarters are defined as starting from February.

View File

@ -1,96 +1,53 @@
---
layout: default
layout: doc_page
---
These types of queries take a groupBy query object and return an array of JSON objects where each object represents a grouping asked for by the query.
An example groupBy query object is shown below:
<pre>
<code>
``` json
{
[queryType]() “groupBy”,
[dataSource]() “sample\_datasource”,
[granularity]() “day”,
[dimensions]() [“dim1”, “dim2”],
[limitSpec]() {
[type]() “default”,
[limit]() 5000,
[columns]() [“dim1”, “metric1”]
},
[filter]() {
[type]() “and”,
[fields]() [
{
[type]() “selector”,
[dimension]() “sample\_dimension1”,
[value]() “sample\_value1”
},
{
[type]() “or”,
[fields]() [
{
[type]() “selector”,
[dimension]() “sample\_dimension2”,
[value]() “sample\_value2”
},
{
[type]() “selector”,
[dimension]() “sample\_dimension3”,
[value]() “sample\_value3”
}
]
}
]
},
[aggregations]() [
{
[type]() “longSum”,
[name]() “sample\_name1”,
[fieldName]() “sample\_fieldName1”
},
{
[type]() “doubleSum”,
[name]() “sample\_name2”,
[fieldName]() “sample\_fieldName2”
}
],
[postAggregations]() [
{
[type]() “arithmetic”,
[name]() “sample\_divide”,
[fn]() “/”,
[fields]() [
{
[type]() “fieldAccess”,
[name]() “sample\_name1”,
[fieldName]() “sample\_fieldName1”
},
{
[type]() “fieldAccess”,
[name]() “sample\_name2”,
[fieldName]() “sample\_fieldName2”
}
]
}
],
[intervals]() [
“2012-01-01T00:00:00.000/2012-01-03T00:00:00.000”
],
[having]() {
[type]() “greaterThan”,
[aggregation]() “sample\_name1”,
[value]() 0
}
"queryType": "groupBy",
"dataSource": "sample_datasource",
"granularity": "day",
"dimensions": ["dim1", "dim2"],
"limitSpec": { "type": "default", "limit": 5000, "columns": ["dim1", "metric1"] },
"filter": {
"type": "and",
"fields": [
{ "type": "selector", "dimension": "sample_dimension1", "value": "sample_value1" },
{ "type": "or",
"fields": [
{ "type": "selector", "dimension": "sample_dimension2", "value": "sample_value2" },
{ "type": "selector", "dimension": "sample_dimension3", "value": "sample_value3" }
]
}
]
},
"aggregations": [
{ "type": "longSum", "name": "sample_name1", "fieldName": "sample_fieldName1" },
{ "type": "doubleSum", "name": "sample_name2", "fieldName": "sample_fieldName2" }
],
"postAggregations": [
{ "type": "arithmetic",
"name": "sample_divide",
"fn": "/",
"fields": [
{ "type": "fieldAccess", "name": "sample_name1", "fieldName": "sample_fieldName1" },
{ "type": "fieldAccess", "name": "sample_name2", "fieldName": "sample_fieldName2" }
]
}
],
"intervals": [ "2012-01-01T00:00:00.000/2012-01-03T00:00:00.000" ],
"having": { "type": "greaterThan", "aggregation": "sample_name1", "value": 0 }
}
</pre>
</code>
```
There are 9 main parts to a groupBy query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be “groupBy”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|queryType|This String should always be "groupBy"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|dimensions|A JSON list of dimensions to do the groupBy over|yes|
|orderBy|See [OrderBy](OrderBy.html).|no|
@ -102,33 +59,32 @@ There are 9 main parts to a groupBy query:
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|context|An additional JSON Object which can be used to specify certain flags.|no|
To pull it all together, the above query would return *n\*m* data points, up to a maximum of 5000 points, where n is the cardinality of the “dim1” dimension, m is the cardinality of the “dim2” dimension, each day between 2012-01-01 and 2012-01-03, from the “sample\_datasource” table. Each data point contains the (long) sum of sample\_fieldName1 if the value of the data point is greater than 0, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set for a particular grouping of “dim1” and “dim2”. The output looks like this:
To pull it all together, the above query would return *n\*m* data points, up to a maximum of 5000 points, where n is the cardinality of the "dim1" dimension, m is the cardinality of the "dim2" dimension, each day between 2012-01-01 and 2012-01-03, from the "sample_datasource" table. Each data point contains the (long) sum of sample_fieldName1 if the value of the data point is greater than 0, the (double) sum of sample_fieldName2 and the (double) the result of sample_fieldName1 divided by sample_fieldName2 for the filter set for a particular grouping of "dim1" and "dim2". The output looks like this:
<pre>
<code>
[ {
“version” : “v1”,
“timestamp” : “2012-01-01T00:00:00.000Z”,
“event” : {
“dim1” : <some_dim1_value>,
“dim2” : <some_dim2_value>,
“sample\_name1” : <some_sample_name1_value>,
“sample\_name2” :<some_sample_name2_value>,
“sample\_divide” : <some_sample_divide_value>
}
}, {
“version” : “v1”,
“timestamp” : “2012-01-01T00:00:00.000Z”,
“event” : {
“dim1” : <some_other_dim1_value>,
“dim2” : <some_other_dim2_value>,
“sample\_name1” : <some_other_sample_name1_value>,
“sample\_name2” :<some_other_sample_name2_value>,
“sample\_divide” : <some_other_sample_divide_value>
}
},
```json
[
{
"version" : "v1",
"timestamp" : "2012-01-01T00:00:00.000Z",
"event" : {
"dim1" : <some_dim_value_one>,
"dim2" : <some_dim_value_two>,
"sample_name1" : <some_sample_name_value_one>,
"sample_name2" :<some_sample_name_value_two>,
"sample_divide" : <some_sample_divide_value>
}
},
{
"version" : "v1",
"timestamp" : "2012-01-01T00:00:00.000Z",
"event" : {
"dim1" : <some_other_dim_value_one>,
"dim2" : <some_other_dim_value_two>,
"sample_name1" : <some_other_sample_name_value_one>,
"sample_name2" :<some_other_sample_name_value_two>,
"sample_divide" : <some_other_sample_divide_value>
}
},
...
]
</pre>
</code>
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
A having clause is a JSON object identifying which rows from a groupBy query should be returned, by specifying conditions on aggregated values.
@ -17,12 +17,13 @@ Numeric filters can be used as the base filters for more complex boolean express
The equalTo filter will match rows with a specific aggregate value.
The grammar for an `equalTo` filter is as follows:
<code>"having": {
"type": "equalTo",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
</code>
```json
{
"type": "equalTo",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
```
This is the equivalent of `HAVING <aggregate> = <value>`.
@ -31,12 +32,13 @@ This is the equivalent of `HAVING <aggregate> = <value>`.
The greaterThan filter will match rows with aggregate values greater than the given value.
The grammar for a `greaterThan` filter is as follows:
<code>"having": {
"type": "greaterThan",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
</code>
```json
{
"type": "greaterThan",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
```
This is the equivalent of `HAVING <aggregate> > <value>`.
@ -45,12 +47,13 @@ This is the equivalent of `HAVING <aggregate> > <value>`.
The lessThan filter will match rows with aggregate values less than the specified value.
The grammar for a `greaterThan` filter is as follows:
<code>"having": {
"type": "lessThan",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
</code>
```json
{
"type": "lessThan",
"aggregation": <aggregate_metric>,
"value": <numeric_value>
}
```
This is the equivalent of `HAVING <aggregate> < <value>`.
@ -60,11 +63,12 @@ This is the equivalent of `HAVING <aggregate> < <value>`.
The grammar for an AND filter is as follows:
<code>"having": {
"type": "and",
"havingSpecs": [<having clause>, <having clause>, ...]
}
</code>
```json
{
"type": "and",
"havingSpecs": [<having clause>, <having clause>, ...]
}
```
The having clauses in `havingSpecs` can be any other having clause defined on this page.
@ -72,11 +76,12 @@ The having clauses in `havingSpecs` can be any other having clause defined on th
The grammar for an OR filter is as follows:
<code>"having": {
"type": "or",
"havingSpecs": [<having clause>, <having clause>, ...]
}
</code>
```json
{
"type": "or",
"havingSpecs": [<having clause>, <having clause>, ...]
}
```
The having clauses in `havingSpecs` can be any other having clause defined on this page.
@ -84,10 +89,11 @@ The having clauses in `havingSpecs` can be any other having clause defined on th
The grammar for a NOT filter is as follows:
<code>"having": {
"type": "not",
"havingSpec": <having clause>
}
</code>
```json
{
"type": "not",
"havingSpec": <having clause>
}
```
The having clause specified at `havingSpec` can be any other having clause defined on this page.

View File

@ -1,10 +1,11 @@
---
layout: default
layout: doc_page
---
Druid is an open-source analytics datastore designed for realtime, exploratory, queries on large-scale data sets (100s of Billions entries, 100s TB data). Druid provides for cost effective, always-on, realtime data ingestion and arbitrary data exploration.
- Check out some [Examples](Examples.html)
- Try out Druid with our Getting Started [Tutorial](https://github.com/metamx/druid/wiki/Tutorial%3A-A-First-Look-at-Druid)
- Try out Druid with our Getting Started [Tutorial](./Tutorial%3A-A-First-Look-at-Druid.html)
- Learn more by reading the [White Paper](http://static.druid.io/docs/druid.pdf)
Why Druid?
@ -24,25 +25,25 @@ We have more details about the general design of the system and why you might wa
The data store world is vast, confusing and constantly in flux. This page is meant to help potential evaluators decide whether Druid is a good fit for the problem one needs to solve. If anything about it is incorrect please provide that feedback on the mailing list or via some other means, we will fix this page.
#### When Druid?
\* You need to do interactive, fast, exploration of large amounts of data
\* You need analytics (not key value store)
\* You have a lot of data (10s of Billions of events added per day, 10s of TB of data added per day)
\* You want to do your analysis on data as its happening (realtime)
\* Your store needs to be always-on, 24x7x365 and years into the future.
* You need to do interactive, fast, exploration of large amounts of data
* You need analytics (not key value store)
* You have a lot of data (10s of Billions of events added per day, 10s of TB of data added per day)
* You want to do your analysis on data as its happening (realtime)
* Your store needs to be always-on, 24x7x365 and years into the future.
#### Not Druid?
\* The amount of data you have can easily be handled by MySql
\* Your querying for individual entries or doing lookups (Not Analytics)
\* Batch is good enough
\* Canned queries is good enough
\* Downtime is no big deal
* The amount of data you have can easily be handled by MySql
* Your querying for individual entries or doing lookups (Not Analytics)
* Batch is good enough
* Canned queries is good enough
* Downtime is no big deal
#### Druid vs…
\* [Druid-vs-Impala-or-Shark](Druid-vs-Impala-or-Shark.html)
\* [Druid-vs-Redshift](Druid-vs-Redshift.html)
\* [Druid-vs-Vertica](Druid-vs-Vertica.html)
\* [Druid-vs-Cassandra](Druid-vs-Cassandra.html)
\* [Druid-vs-Hadoop](Druid-vs-Hadoop.html)
* [Druid-vs-Impala-or-Shark](Druid-vs-Impala-or-Shark.html)
* [Druid-vs-Redshift](Druid-vs-Redshift.html)
* [Druid-vs-Vertica](Druid-vs-Vertica.html)
* [Druid-vs-Cassandra](Druid-vs-Cassandra.html)
* [Druid-vs-Hadoop](Druid-vs-Hadoop.html)
Key Features
------------

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Disclaimer: We are still in the process of finalizing the indexing service and these configs are prone to change at any time. We will announce when we feel the indexing service and the configurations described are stable.
@ -21,27 +21,37 @@ The indexer coordinator node exposes HTTP endpoints where tasks can be submitted
Tasks can be submitted via POST requests to:
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task
```
Tasks can cancelled via POST requests to:
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/shutdown
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/shutdown
```
Issuing the cancel request once sends a graceful shutdown request. Graceful shutdowns may not stop a task right away, but instead issue a safe stop command at a point deemed least impactful to the system. Issuing the cancel request twice in succession will kill 9 the task.
Task statuses can be retrieved via GET requests to:
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/status
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/status
```
Task segments can be retrieved via GET requests to:
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/segments
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/segments
```
When a task is submitted, the coordinator creates a lock over the data source and interval of the task. The coordinator also stores the task in a MySQL database table. The database table is read at startup time to bootstrap any tasks that may have been submitted to the coordinator but may not yet have been executed.
The coordinator also exposes a simple UI to show what tasks are currently running on what nodes at
http://<COORDINATOR_IP>:<port>/static/console.html
```
http://<COORDINATOR_IP>:<port>/static/console.html
```
#### Task Execution
@ -55,31 +65,34 @@ The Autoscaling mechanisms currently in place are tightly coupled with our deplo
The Coordinator node controls the number of workers in the cluster according to a worker setup spec that is submitted via a POST request to the indexer at:
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
```
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
```
A sample worker setup spec is shown below:
<code>{
"minVersion":"some_version",
"minNumWorkers":"0",
"maxNumWorkers":"10",
"nodeData": {
"type":"ec2",
"amiId":"ami-someId",
"instanceType":"m1.xlarge",
"minInstances":"1",
"maxInstances":"1",
"securityGroupIds":["securityGroupIds"],
"keyName":"keyName"
},
"userData":{
"classType":"galaxy",
"env":"druid",
"version":"druid_version",
"type":"sample_cluster/worker"
}
}
</code>
```
{
"minVersion":"some_version",
"minNumWorkers":"0",
"maxNumWorkers":"10",
"nodeData": {
"type":"ec2",
"amiId":"ami-someId",
"instanceType":"m1.xlarge",
"minInstances":"1",
"maxInstances":"1",
"securityGroupIds":["securityGroupIds"],
"keyName":"keyName"
},
"userData":{
"classType":"galaxy",
"env":"druid",
"version":"druid_version",
"type":"sample_cluster/worker"
}
}
```
Issuing a GET request at the same URL will return the current worker setup spec that is currently in place. The worker setup spec list above is just a sample and it is possible to write worker setup specs for other deployment environments. A description of the worker setup spec is shown below.
@ -101,19 +114,21 @@ Indexer Coordinator nodes can be run using the `com.metamx.druid.indexing.coordi
Indexer Coordinator nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
-Ddruid.zk.paths.indexer.announcementsPath=/druid/indexer/announcements
-Ddruid.zk.paths.indexer.leaderLatchPath=/druid/indexer/leaderLatchPath
-Ddruid.zk.paths.indexer.statusPath=/druid/indexer/status
-Ddruid.zk.paths.indexer.tasksPath=/druid/demo/indexer/tasks
```
-Ddruid.zk.paths.indexer.announcementsPath=/druid/indexer/announcements
-Ddruid.zk.paths.indexer.leaderLatchPath=/druid/indexer/leaderLatchPath
-Ddruid.zk.paths.indexer.statusPath=/druid/indexer/status
-Ddruid.zk.paths.indexer.tasksPath=/druid/demo/indexer/tasks
-Ddruid.indexer.runner=remote
-Ddruid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.configTable=sample_config
-Ddruid.indexer.workerSetupConfigName=worker_setup
-Ddruid.indexer.strategy=ec2
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
-Ddruid.indexer.logs.s3bucket=some_bucket
-Ddruid.indexer.logs.s3prefix=some_prefix
-Ddruid.indexer.runner=remote
-Ddruid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.configTable=sample_config
-Ddruid.indexer.workerSetupConfigName=worker_setup
-Ddruid.indexer.strategy=ec2
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
-Ddruid.indexer.logs.s3bucket=some_bucket
-Ddruid.indexer.logs.s3prefix=some_prefix
```
The indexing service requires some additional Zookeeper configs.
@ -128,7 +143,7 @@ Theres several additional configs that are required to run tasks.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.indexer.runner`|Indicates whether tasks should be run locally or in a distributed environment. “local” or “remote”.|local|
|`druid.indexer.runner`|Indicates whether tasks should be run locally or in a distributed environment. "local" or "remote".|local|
|`druid.indexer.taskDir`|Intermediate temporary directory that tasks may use.|none|
|`druid.indexer.configTable`|The MySQL config table where misc configs live.|none|
|`druid.indexer.strategy`|The autoscaling strategy to use.|noop|
@ -140,7 +155,9 @@ Theres several additional configs that are required to run tasks.
The indexer console can be used to view pending tasks, running tasks, available workers, and recent worker creation and termination. The console can be accessed at:
http://<COORDINATOR_IP>:8080/static/console.html
```
http://<COORDINATOR_IP>:8080/static/console.html
```
Worker Node
-----------
@ -155,29 +172,31 @@ Worker nodes can be run using the `com.metamx.druid.indexing.worker.http.WorkerM
Worker nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
-Ddruid.worker.version=0
-Ddruid.worker.capacity=3
```
-Ddruid.worker.version=0
-Ddruid.worker.capacity=3
-Ddruid.indexer.threads=3
-Ddruid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
-Ddruid.indexer.threads=3
-Ddruid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
-Ddruid.worker.masterService=druid:sample_cluster:indexer
-Ddruid.worker.masterService=druid:sample_cluster:indexer
-Ddruid.indexer.fork.hostpattern=<IP>:%d
-Ddruid.indexer.fork.startport=8080
-Ddruid.indexer.fork.main=com.metamx.druid.indexing.worker.executor.ExecutorMain
-Ddruid.indexer.fork.opts="-server -Xmx1g -Xms1g -XX:NewSize=256m -XX:MaxNewSize=256m"
-Ddruid.indexer.fork.property.druid.service=druid/sample_cluster/executor
-Ddruid.indexer.fork.hostpattern=<IP>:%d
-Ddruid.indexer.fork.startport=8080
-Ddruid.indexer.fork.main=com.metamx.druid.indexing.worker.executor.ExecutorMain
-Ddruid.indexer.fork.opts="-server -Xmx1g -Xms1g -XX:NewSize=256m -XX:MaxNewSize=256m"
-Ddruid.indexer.fork.property.druid.service=druid/sample_cluster/executor
# These configs are the same configs you would set for basic service configuration, just with a different prefix
-Ddruid.indexer.fork.property.druid.monitoring.monitorSystem=false
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
-Ddruid.indexer.fork.property.druid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.fork.property.druid.processing.formatString=processing-%s
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.server.maxSize=0
-Ddruid.indexer.fork.property.druid.request.logging.dir=request_logs/
# These configs are the same configs you would set for basic service configuration, just with a different prefix
-Ddruid.indexer.fork.property.druid.monitoring.monitorSystem=false
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
-Ddruid.indexer.fork.property.druid.indexer.taskDir=/mnt/persistent/task/
-Ddruid.indexer.fork.property.druid.processing.formatString=processing-%s
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.server.maxSize=0
-Ddruid.indexer.fork.property.druid.request.logging.dir=request_logs/
```
Many of the configurations for workers are similar to those for basic service configuration":https://github.com/metamx/druid/wiki/Configuration\#basic-service-configuration, but with a different config prefix. Below we describe the unique worker configs.

View File

@ -1,9 +1,6 @@
---
layout: default
layout: doc_page
---
### R
- [RDruid](https://github.com/metamx/RDruid) - Druid connector for R
Community Libraries
-------------------
@ -11,13 +8,18 @@ Community Libraries
Some great folks have written their own libraries to interact with Druid
#### Ruby
\* [madvertise/ruby-druid](https://github.com/madvertise/ruby-druid) - A ruby client for Druid
* [madvertise/ruby-druid](https://github.com/madvertise/ruby-druid) - A ruby client for Druid
#### Python
\* [metamx/pydruid](https://github.com/metamx/pydruid) - A python client for Druid
* [metamx/pydruid](https://github.com/metamx/pydruid) - A python client for Druid
#### R
- [RDruid](https://github.com/metamx/RDruid) - Druid connector for R
#### Helper Libraries
- [madvertise/druid-dumbo](https://github.com/madvertise/druid-dumbo) - Scripts to help generate batch configs for the ingestion of data into Druid
- [housejester/druid-test-harness](https://github.com/housejester/druid-test-harness) - A set of scripts to simplify standing up some servers and seeing how things work
* [madvertise/druid-dumbo](https://github.com/madvertise/druid-dumbo) - Scripts to help generate batch configs for the ingestion of data into Druid
* [housejester/druid-test-harness](https://github.com/housejester/druid-test-harness) - A set of scripts to simplify standing up some servers and seeing how things work

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Once you have a realtime node working, it is time to load your own data to see how Druid performs.
@ -18,7 +18,7 @@ mkdir config/broker
## Loading Data with Kafka ##
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in realtime without writing any code. To load data to a realtime node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in realtime without writing any code. To load data to a realtime node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
### Booting Kafka ###
@ -58,7 +58,7 @@ Instructions for booting a Zookeeper and then Kafka cluster are available [here]
1. Create a valid configuration file similar to this called config/realtime/runtime.properties:
```
```properties
druid.host=0.0.0.0:8080
druid.port=8080
@ -91,7 +91,6 @@ Instructions for booting a Zookeeper and then Kafka cluster are available [here]
druid.database.password=diurd
druid.database.connectURI=
druid.host=127.0.0.1:8080
```
2. Create a valid realtime configuration file similar to this called realtime.spec:
@ -239,7 +238,7 @@ If you've already setup a realtime node, be aware that although you can run mult
1. Setup a configuration file called config/master/runtime.properties similar to:
```bash
```properties
druid.host=0.0.0.0:8081
druid.port=8081
@ -294,7 +293,7 @@ If you've already setup a realtime node, be aware that although you can run mult
1. Create a configuration file in config/compute/runtime.properties similar to:
```bash
```properties
druid.host=0.0.0.0:8082
druid.port=8082
@ -404,7 +403,9 @@ Now its time to run the Hadoop [Batch-ingestion](Batch-ingestion.html) job, Hado
2. Now run the job, with the config pointing at batchConfig.json:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Ddruid.realtime.specFile=realtime.spec -classpath lib/* com.metamx.druid.indexer.HadoopDruidIndexerMain batchConfig.json
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-Ddruid.realtime.specFile=realtime.spec -classpath lib/* \
com.metamx.druid.indexer.HadoopDruidIndexerMain batchConfig.json
```
You can now move on to [Querying Your Data](Querying-Your-Data.html)!

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Master
======
@ -39,37 +39,67 @@ The master node exposes several HTTP endpoints for interactions.
### GET
/info/master - returns the current true master of the cluster as a JSON object. E.g. A GET request to <IP>:8080/info/master will yield JSON of the form {[host]("IP"})
* `/info/master`
/info/cluster - returns JSON data about every node and segment in the cluster. E.g. A GET request to <IP>:8080/info/cluster will yield JSON data organized by nodes. Information about each node and each segment on each node will be returned.
Returns the current true master of the cluster as a JSON object.
/info/servers (optional param ?full) - returns all segments in the cluster if the full flag is not set, otherwise returns full metadata about all servers in the cluster
* `/info/cluster`
/info/servers/{serverName} - returns full metadata about a specific server
Returns JSON data about every node and segment in the cluster. Information about each node and each segment on each node will be returned.
/info/servers/{serverName}/segments (optional param ?full) - returns a list of all segments for a server if the full flag is not set, otherwise returns all segment metadata
* `/info/servers`
/info/servers/{serverName}/segments/{segmentId} - returns full metadata for a specific segment
Returns information about servers in the cluster. Set the `?full` query parameter to get full metadata about all servers and their segments in the cluster.
/info/segments (optional param ?full)- returns all segments in the cluster as a list if the full flag is not set, otherwise returns all metadata about segments in the cluster
* `/info/servers/{serverName}`
/info/segments/{segmentId} - returns full metadata for a specific segment
Returns full metadata about a specific server.
/info/datasources (optional param ?full) - returns a list of datasources in the cluster if the full flag is not set, otherwise returns all the metadata for every datasource in the cluster
* `/info/servers/{serverName}/segments`
/info/datasources/{dataSourceName} - returns full metadata for a datasource
Returns a list of all segments for a server. Set the `?full` query parameter to get all segment metadata included
/info/datasources/{dataSourceName}/segments (optional param ?full) - returns a list of all segments for a datasource if the full flag is not set, otherwise returns full segment metadata for a datasource
* `/info/servers/{serverName}/segments/{segmentId}`
/info/datasources/{dataSourceName}/segments/{segmentId} - returns full segment metadata for a specific segment
Returns full metadata for a specific segment.
/info/rules - returns all rules for all data sources in the cluster including the default datasource.
* `/info/segments`
/info/rules/{dataSourceName} - returns all rules for a specified datasource
Returns all segments in the cluster as a list. Set the `?full` flag to get all metadata about segments in the cluster
* `/info/segments/{segmentId}`
Returns full metadata for a specific segment
* `/info/datasources`
Returns a list of datasources in the cluster. Set the `?full` flag to get all metadata for every datasource in the cluster
* `/info/datasources/{dataSourceName}`
Returns full metadata for a datasource
* `/info/datasources/{dataSourceName}/segments`
Returns a list of all segments for a datasource. Set the `?full` flag to get full segment metadata for a datasource
* `/info/datasources/{dataSourceName}/segments/{segmentId}`
Returns full segment metadata for a specific segment
* `/info/rules`
Returns all rules for all data sources in the cluster including the default datasource.
* `/info/rules/{dataSourceName}`
Returns all rules for a specified datasource
### POST
/info/rules/{dataSourceName} - POST with a list of rules in JSON form to update rules.
* `/info/rules/{dataSourceName}`
POST with a list of rules in JSON form to update rules.
The Master Console
------------------
@ -83,17 +113,17 @@ FAQ
1. **Do clients ever contact the master node?**
The master is not involved in the lifecycle of a query.
The master is not involved in a query.
Compute nodes never directly contact the master node. The Druid master tells the compute nodes to load/drop data via Zookeeper, but the compute nodes are completely unaware of the master.
Compute nodes never directly contact the master node. The Druid master tells the compute nodes to load/drop data via Zookeeper, but the compute nodes are completely unaware of the master.
Brokers also never contact the master. Brokers base their understanding of the data topology on metadata exposed by the compute nodes via ZK and are completely unaware of the master.
Brokers also never contact the master. Brokers base their understanding of the data topology on metadata exposed by the compute nodes via ZK and are completely unaware of the master.
2. **Does it matter if the master node starts up before or after other processes?**
No. If the Druid master is not started up, no new segments will be loaded in the cluster and outdated segments will not be dropped. However, the master node can be started up at any time, and after a configurable delay, will start running master tasks.
No. If the Druid master is not started up, no new segments will be loaded in the cluster and outdated segments will not be dropped. However, the master node can be started up at any time, and after a configurable delay, will start running master tasks.
This also means that if you have a working cluster and all of your masters die, the cluster will continue to function, it just wont experience any changes to its data topology.
This also means that if you have a working cluster and all of your masters die, the cluster will continue to function, it just wont experience any changes to its data topology.
Running
-------

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
MySQL is an external dependency of Druid. We use it to store various metadata about the system, but not to store the actual data. There are a number of tables used for various purposes described below.
@ -10,24 +10,26 @@ This is dictated by the `druid.database.segmentTable` property (Note that these
This table stores metadata about the segments that are available in the system. The table is polled by the [Master](Master.html) to determine the set of segments that should be available for querying in the system. The table has two main functional columns, the other columns are for indexing purposes.
The `used` column is a boolean “tombstone”. A 1 means that the segment should be “used” by the cluster (i.e. it should be loaded and available for requests). A 0 means that the segment should not be actively loaded into the cluster. We do this as a means of removing segments from the cluster without actually removing their metadata (which allows for simpler rolling back if that is ever an issue).
The `used` column is a boolean "tombstone". A 1 means that the segment should be "used" by the cluster (i.e. it should be loaded and available for requests). A 0 means that the segment should not be actively loaded into the cluster. We do this as a means of removing segments from the cluster without actually removing their metadata (which allows for simpler rolling back if that is ever an issue).
The `payload` column stores a JSON blob that has all of the metadata for the segment (some of the data stored in this payload is redundant with some of the columns in the table, that is intentional). This looks something like
{
"dataSource":"wikipedia",
"interval":"2012-05-23T00:00:00.000Z/2012-05-24T00:00:00.000Z",
"version":"2012-05-24T00:10:00.046Z",
"loadSpec":{"type":"s3_zip",
"bucket":"bucket_for_segment",
"key":"path/to/segment/on/s3"},
"dimensions":"comma-delimited-list-of-dimension-names",
"metrics":"comma-delimited-list-of-metric-names",
"shardSpec":{"type":"none"},
"binaryVersion":9,
"size":size_of_segment,
"identifier":"wikipedia_2012-05-23T00:00:00.000Z_2012-05-24T00:00:00.000Z_2012-05-23T00:10:00.046Z"
}
```
{
"dataSource":"wikipedia",
"interval":"2012-05-23T00:00:00.000Z/2012-05-24T00:00:00.000Z",
"version":"2012-05-24T00:10:00.046Z",
"loadSpec":{"type":"s3_zip",
"bucket":"bucket_for_segment",
"key":"path/to/segment/on/s3"},
"dimensions":"comma-delimited-list-of-dimension-names",
"metrics":"comma-delimited-list-of-metric-names",
"shardSpec":{"type":"none"},
"binaryVersion":9,
"size":size_of_segment,
"identifier":"wikipedia_2012-05-23T00:00:00.000Z_2012-05-24T00:00:00.000Z_2012-05-23T00:10:00.046Z"
}
```
Note that the format of this blob can and will change from time-to-time.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
The orderBy field provides the functionality to sort and limit the set of results from a groupBy query. Available options are:
@ -7,21 +7,21 @@ The orderBy field provides the functionality to sort and limit the set of result
The default limit spec takes a limit and the list of columns to do an orderBy operation over. The grammar is:
<code>
{
"type" : "default",
"limit" : <integer_value>,
"columns" : [list of OrderByColumnSpec],
}
</code>
```json
{
"type" : "default",
"limit" : <integer_value>,
"columns" : [list of OrderByColumnSpec],
}
```
#### OrderByColumnSpec
OrderByColumnSpecs indicate how to do order by operations. Each order by condition can be a <code>String</code> or a map of the following form:
OrderByColumnSpecs indicate how to do order by operations. Each order by condition can be a `jsonString` or a map of the following form:
<code>
{
"dimension" : "<Any dimension or metric>",
"direction" : "ASCENDING OR DESCENDING"
}
</code>
```json
{
"dimension" : <Any dimension or metric>,
"direction" : "ASCENDING OR DESCENDING"
}
```

View File

@ -1,7 +1,7 @@
---
layout: default
layout: doc_page
---
The Plumber is the thing that handles generated segments both while they are being generated and when they are “done”. This is also technically a pluggable interface and there are multiple implementations, but there are a lot of details handled by the plumber such that it is expected that there will only be a few implementations and only more advanced third-parties will implement their own. See [here](https://github.com/metamx/druid/wiki/Plumber#available-plumbers) for a description of the plumbers included with Druid.
The Plumber is the thing that handles generated segments both while they are being generated and when they are "done". This is also technically a pluggable interface and there are multiple implementations, but there are a lot of details handled by the plumber such that it is expected that there will only be a few implementations and only more advanced third-parties will implement their own.
|Field|Type|Description|Required|
|-----|----|-----------|--------|
@ -9,8 +9,8 @@ The Plumber is the thing that handles generated segments both while they are bei
We provide a brief description of the example to exemplify the types of things that are configured on the plumber.
- `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
- `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
* `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
* `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
Available Plumbers
------------------

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Post-aggregations are specifications of processing that should happen on aggregated values as they come out of Druid. If you include a post aggregation as part of a query, make sure to include all aggregators the post-aggregator requires.
@ -13,83 +13,63 @@ Supported functions are `+`, `-`, `*`, and `/`
The grammar for an arithmetic post aggregation is:
<code>postAggregation : {
"type" : "arithmetic",
"name" : <output_name>,
"fn" : <arithmetic_function>,
"fields": [<post_aggregator>, <post_aggregator>, ...]
}</code>
```json
postAggregation : {
"type" : "arithmetic",
"name" : <output_name>,
"fn" : <arithmetic_function>,
"fields": [<post_aggregator>, <post_aggregator>, ...]
}
```
### Field accessor post-aggregator
This returns the value produced by the specified [aggregator|Aggregations](aggregator|Aggregations.html).
This returns the value produced by the specified [aggregator](Aggregations.html).
`fieldName` refers to the output name of the aggregator given in the [aggregations|Aggregations](aggregations|Aggregations.html) portion of the query.
`fieldName` refers to the output name of the aggregator given in the [aggregations](Aggregations.html) portion of the query.
<code>field_accessor : {
"type" : "fieldAccess",
"fieldName" : <aggregator_name>
}</code>
```json
{ "type" : "fieldAccess", "fieldName" : <aggregator_name> }
```
### Constant post-aggregator
The constant post-aggregator always returns the specified value.
<code>constant : {
"type" : "constant",
"name" : <output_name>,
"value" : <numerical_value>,
}</code>
```json
{ "type" : "constant", "name" : <output_name>, "value" : <numerical_value> }
```
### Example Usage
In this example, lets calculate a simple percentage using post aggregators. Lets imagine our data set has a metric called “total”.
In this example, lets calculate a simple percentage using post aggregators. Lets imagine our data set has a metric called "total".
The format of the query JSON is as follows:
<code>
{
...
"aggregations" : [
{
"type" : "count",
"name" : "rows"
},
{
"type" : "doubleSum",
"name" : "tot",
"fieldName" : "total"
}
],
"postAggregations" : {
"type" : "arithmetic",
"name" : "average",
"fn" : "*",
"fields" : [
{
"type" : "arithmetic",
"name" : "div",
"fn" : "/",
"fields" : [
{
"type" : "fieldAccess",
"name" : "tot",
"fieldName" : "tot"
},
{
"type" : "fieldAccess",
"name" : "rows",
"fieldName" : "rows"
}
]
},
{
"type" : "constant",
"name": "const",
"value" : 100
}
]
}
...
}
</code>
```json
{
...
"aggregations" : [
{ "type" : "count", "name" : "rows" },
{ "type" : "doubleSum", "name" : "tot", "fieldName" : "total" }
],
"postAggregations" : {
"type" : "arithmetic",
"name" : "average",
"fn" : "*",
"fields" : [
{ "type" : "arithmetic",
"name" : "div",
"fn" : "/",
"fields" : [
{ "type" : "fieldAccess", "name" : "tot", "fieldName" : "tot" },
{ "type" : "fieldAccess", "name" : "rows", "fieldName" : "rows" }
]
},
{ "type" : "constant", "name": "const", "value" : 100 }
]
}
...
}
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
# Setup #
@ -8,93 +8,100 @@ Before we start querying druid, we're going to finish setting up a complete clus
## Booting a Broker Node ##
1. Setup a config file at config/broker/runtime.properties that looks like this:
```
druid.host=0.0.0.0:8083
druid.port=8083
```
druid.host=0.0.0.0:8083
druid.port=8083
com.metamx.emitter.logging=true
com.metamx.emitter.logging=true
druid.processing.formatString=processing_%s
druid.processing.numThreads=1
druid.processing.buffer.sizeBytes=10000000
druid.processing.formatString=processing_%s
druid.processing.numThreads=1
druid.processing.buffer.sizeBytes=10000000
#emitting, opaque marker
druid.service=example
#emitting, opaque marker
druid.service=example
druid.request.logging.dir=/tmp/example/log
druid.realtime.specFile=realtime.spec
com.metamx.emitter.logging=true
com.metamx.emitter.logging.level=debug
druid.request.logging.dir=/tmp/example/log
druid.realtime.specFile=realtime.spec
com.metamx.emitter.logging=true
com.metamx.emitter.logging.level=debug
# below are dummy values when operating a realtime only node
druid.processing.numThreads=3
# below are dummy values when operating a realtime only node
druid.processing.numThreads=3
com.metamx.aws.accessKey=dummy_access_key
com.metamx.aws.secretKey=dummy_secret_key
druid.pusher.s3.bucket=dummy_s3_bucket
com.metamx.aws.accessKey=dummy_access_key
com.metamx.aws.secretKey=dummy_secret_key
druid.pusher.s3.bucket=dummy_s3_bucket
druid.zk.service.host=localhost
druid.server.maxSize=300000000000
druid.zk.paths.base=/druid
druid.database.segmentTable=prod_segments
druid.database.user=druid
druid.database.password=diurd
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
druid.zk.paths.discoveryPath=/druid/discoveryPath
druid.database.ruleTable=rules
druid.database.configTable=config
druid.zk.service.host=localhost
druid.server.maxSize=300000000000
druid.zk.paths.base=/druid
druid.database.segmentTable=prod_segments
druid.database.user=druid
druid.database.password=diurd
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
druid.zk.paths.discoveryPath=/druid/discoveryPath
druid.database.ruleTable=rules
druid.database.configTable=config
# Path on local FS for storage of segments; dir will be created if needed
druid.paths.indexCache=/tmp/druid/indexCache
# Path on local FS for storage of segment metadata; dir will be created if needed
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
druid.pusher.local.storageDirectory=/tmp/druid/localStorage
druid.pusher.local=true
# Path on local FS for storage of segments; dir will be created if needed
druid.paths.indexCache=/tmp/druid/indexCache
# Path on local FS for storage of segment metadata; dir will be created if needed
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
druid.pusher.local.storageDirectory=/tmp/druid/localStorage
druid.pusher.local=true
# thread pool size for servicing queries
druid.client.http.connections=30
```
# thread pool size for servicing queries
druid.client.http.connections=30
```
2. Run the broker node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-Ddruid.realtime.specFile=realtime.spec \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/broker \
com.metamx.druid.http.BrokerMain
```
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-Ddruid.realtime.specFile=realtime.spec \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/broker \
com.metamx.druid.http.BrokerMain
```
## Booting a Master Node ##
1. Setup a config file at config/master/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818870](https://gist.github.com/rjurney/5818870)
2. Run the master node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/master \
com.metamx.druid.http.MasterMain
```
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/master \
com.metamx.druid.http.MasterMain
```
## Booting a Realtime Node ##
1. Setup a config file at config/realtime/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818774](https://gist.github.com/rjurney/5818774)
2. Setup a realtime.spec file like this: [https://gist.github.com/rjurney/5818779](https://gist.github.com/rjurney/5818779)
3. Run the realtime node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-Ddruid.realtime.specFile=realtime.spec \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/realtime \
com.metamx.druid.realtime.RealtimeMain
```
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-Ddruid.realtime.specFile=realtime.spec \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/realtime \
com.metamx.druid.realtime.RealtimeMain
```
## Booting a Compute Node ##
1. Setup a config file at config/compute/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818885](https://gist.github.com/rjurney/5818885)
2. Run the compute node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/compute \
com.metamx.druid.http.ComputeMain
```
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/compute \
com.metamx.druid.http.ComputeMain
```
# Querying Your Data #
@ -107,6 +114,7 @@ As a shared-nothing system, there are three ways to query druid, against the [Re
### Construct a Query ###
For constructing this query, see: Querying against the realtime.spec
```json
{
"queryType": "groupBy",
@ -125,57 +133,52 @@ For constructing this query, see: Querying against the realtime.spec
### Querying the Realtime Node ###
Run our query against port 8080:
```bash
curl -X POST "http://localhost:8080/druid/v2/?pretty" \
-H 'content-type: application/json' -d @query.body
curl -X POST "http://localhost:8080/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
```
See our result:
```json
[ {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 5,
"wp" : 15000.0,
"rows" : 5
}
"event" : { "imps" : 5, "wp" : 15000.0, "rows" : 5 }
} ]
```
### Querying the Compute Node ###
Run the query against port 8082:
```bash
curl -X POST "http://localhost:8082/druid/v2/?pretty" \
-H 'content-type: application/json' -d @query.body
curl -X POST "http://localhost:8082/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
```
And get (similar to):
```json
[ {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 27,
"wp" : 77000.0,
"rows" : 9
}
"event" : { "imps" : 27, "wp" : 77000.0, "rows" : 9 }
} ]
```
### Querying both Nodes via the Broker ###
Run the query against port 8083:
```bash
curl -X POST "http://localhost:8083/druid/v2/?pretty" \
-H 'content-type: application/json' -d @query.body
curl -X POST "http://localhost:8083/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
```
And get:
```json
[ {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 5,
"wp" : 15000.0,
"rows" : 5
}
"event" : { "imps" : 5, "wp" : 15000.0, "rows" : 5 }
} ]
```
@ -189,9 +192,9 @@ How are we to know what queries we can run? Although [Querying](Querying.html) i
[{
"schema" : { "dataSource":"druidtest",
"aggregators":[ {"type":"count", "name":"impressions"},
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
"indexGranularity":"minute",
"shardSpec" : { "type": "none" } },
"shardSpec" : { "type": "none" } },
"config" : { "maxRowsInMemory" : 500000,
"intermediatePersistPeriod" : "PT10m" },
"firehose" : { "type" : "kafka-0.7.2",
@ -221,6 +224,7 @@ How are we to know what queries we can run? Although [Querying](Querying.html) i
```json
"dataSource":"druidtest"
```
Our dataSource tells us the name of the relation/table, or 'source of data', to query in both our realtime.spec and query.body!
### aggregations ###
@ -239,7 +243,7 @@ this matches up to the aggregators in the schema of our realtime.spec!
```json
"aggregators":[ {"type":"count", "name":"impressions"},
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
```
### dimensions ###
@ -277,48 +281,23 @@ Which gets us grouped data in return!
[ {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 1,
"age" : "100",
"wp" : 1000.0,
"rows" : 1
}
"event" : { "imps" : 1, "age" : "100", "wp" : 1000.0, "rows" : 1 }
}, {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 1,
"age" : "20",
"wp" : 3000.0,
"rows" : 1
}
"event" : { "imps" : 1, "age" : "20", "wp" : 3000.0, "rows" : 1 }
}, {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 1,
"age" : "30",
"wp" : 4000.0,
"rows" : 1
}
"event" : { "imps" : 1, "age" : "30", "wp" : 4000.0, "rows" : 1 }
}, {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 1,
"age" : "40",
"wp" : 5000.0,
"rows" : 1
}
"event" : { "imps" : 1, "age" : "40", "wp" : 5000.0, "rows" : 1 }
}, {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 1,
"age" : "50",
"wp" : 2000.0,
"rows" : 1
}
"event" : { "imps" : 1, "age" : "50", "wp" : 2000.0, "rows" : 1 }
} ]
```
@ -331,11 +310,7 @@ Now that we've observed our dimensions, we can also filter:
"queryType": "groupBy",
"dataSource": "druidtest",
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "gender",
"value": "male"
},
"filter": { "type": "selector", "dimension": "gender", "value": "male" },
"aggregations": [
{"type": "count", "name": "rows"},
{"type": "longSum", "name": "imps", "fieldName": "impressions"},
@ -351,11 +326,7 @@ Which gets us just people aged 40:
[ {
"version" : "v1",
"timestamp" : "2010-01-01T00:00:00.000Z",
"event" : {
"imps" : 3,
"wp" : 9000.0,
"rows" : 3
}
"event" : { "imps" : 3, "wp" : 9000.0, "rows" : 3 }
} ]
```
@ -363,4 +334,4 @@ Check out [Filters](Filters.html) for more.
## Learn More ##
You can learn more about querying at [Querying](Querying.html)! Now check out [Booting a production cluster](Booting-a-production-cluster.html)!
You can learn more about querying at [Querying](Querying.html)! Now check out [Booting a production cluster](Booting-a-production-cluster.html)!

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Querying
========
@ -8,86 +8,100 @@ Queries are made using an HTTP REST style request to a [Broker](Broker.html), [C
We start by describing an example query with additional comments that mention possible variations. Query operators are also summarized in a table below.
Example Query “rand”
Example Query "rand"
--------------------
Here is the query in the examples/rand subproject (file is query.body), followed by a commented version of the same.
\`\`\`javascript
```javascript
{
[queryType]() “groupBy”,
[dataSource]() “randSeq”,
[granularity]() “all”,
[dimensions]() [],
[aggregations]() [
{ [type]() “count”, [name]() “rows” },
{ [type]() “doubleSum”, [fieldName]() “events”, [name]() “e” },
{ [type]() “doubleSum”, [fieldName]() “outColumn”, [name]() “randomNumberSum” }
],
[postAggregations]() [{
[type]() “arithmetic”,
[name]() “avg\_random”,
[fn]() “/”,
[fields]() [
{ [type]() “fieldAccess”, [fieldName]() “randomNumberSum” },
{ [type]() “fieldAccess”, [fieldName]() “rows” }
]
}],
[intervals]() [“2012-10-01T00:00/2020-01-01T00”]
"queryType": "groupBy",
"dataSource": "randSeq",
"granularity": "all",
"dimensions": [],
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "events", "name": "e" },
{ "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum" }
],
"postAggregations": [{
"type": "arithmetic",
"name": "avg_random",
"fn": "/",
"fields": [
{ "type": "fieldAccess", "fieldName": "randomNumberSum" },
{ "type": "fieldAccess", "fieldName": "rows" }
]
}],
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
}
\`\`\`
```
This query could be submitted via curl like so (assuming the query object is in a file “query.json”).
This query could be submitted via curl like so (assuming the query object is in a file "query.json").
curl -X POST "http://host:port/druid/v2/?pretty" -H 'content-type: application/json' -d @query.json
```
curl -X POST "http://host:port/druid/v2/?pretty" -H 'content-type: application/json' -d @query.json
```
The “pretty” query parameter gets the results formatted a bit nicer.
The "pretty" query parameter gets the results formatted a bit nicer.
Details of Example Query “rand”
Details of Example Query "rand"
-------------------------------
The queryType JSON field identifies which kind of query operator is to be used, in this case it is groupBy, the most frequently used kind (which corresponds to an internal implementation class GroupByQuery registered as “groupBy”), and it has a set of required fields that are also part of this query. The queryType can also be “search” or “timeBoundary” which have similar or different required fields summarized below:
\`\`\`javascript
The queryType JSON field identifies which kind of query operator is to be used, in this case it is groupBy, the most frequently used kind (which corresponds to an internal implementation class GroupByQuery registered as "groupBy"), and it has a set of required fields that are also part of this query. The queryType can also be "search" or "timeBoundary" which have similar or different required fields summarized below:
```javascript
{
[queryType]() “groupBy”,
\`\`\`
The dataSource JSON field shown next identifies where to apply the query. In this case, randSeq corresponds to the examples/rand/rand\_realtime.spec file schema:
\`\`\`javascript
[dataSource]() “randSeq”,
\`\`\`
The granularity JSON field specifies the bucket size for values. It could be a built-in time interval like “second”, “minute”, “fifteen\_minute”, “thirty\_minute”, “hour” or “day”. It can also be an expression like `{"type": "period", "period":"PT6m"}` meaning “6 minute buckets”. See [Granularities](Granularities.html) for more information on the different options for this field. In this example, it is set to the special value “all” which means [bucket all data points together into the same time bucket]()
\`\`\`javascript
[granularity]() “all”,
\`\`\`
"queryType": "groupBy",
```
The dataSource JSON field shown next identifies where to apply the query. In this case, randSeq corresponds to the examples/rand/rand_realtime.spec file schema:
```javascript
"dataSource": "randSeq",
```
The granularity JSON field specifies the bucket size for values. It could be a built-in time interval like "second", "minute", "fifteen_minute", "thirty_minute", "hour" or "day". It can also be an expression like `{"type": "period", "period":"PT6m"}` meaning "6 minute buckets". See [Granularities](Granularities.html) for more information on the different options for this field. In this example, it is set to the special value "all" which means [bucket all data points together into the same time bucket]()
```javascript
"granularity": "all",
```
The dimensions JSON field value is an array of zero or more fields as defined in the dataSource spec file or defined in the input records and carried forward. These are used to constrain the grouping. If empty, then one value per time granularity bucket is requested in the groupBy:
\`\`\`javascript
[dimensions]() [],
\`\`\`
A groupBy also requires the JSON field “aggregations” (See [Aggregations](Aggregations.html)), which are applied to the column specified by fieldName and the output of the aggregation will be named according to the value in the “name” field:
\`\`\`javascript
[aggregations]() [
{ [type]() “count”, [name]() “rows” },
{ [type]() “doubleSum”, [fieldName]() “events”, [name]() “e” },
{ [type]() “doubleSum”, [fieldName]() “outColumn”, [name]() “randomNumberSum” }
],
\`\`\`
You can also specify postAggregations, which are applied after data has been aggregated for the current granularity and dimensions bucket. See [Post Aggregations](Post Aggregations.html) for a detailed description. In the rand example, an arithmetic type operation (division, as specified by “fn”) is performed with the result “name” of “avg\_random”. The “fields” field specifies the inputs from the aggregation stage to this expression. Note that identifiers corresponding to “name” JSON field inside the type “fieldAccess” are required but not used outside this expression, so they are prefixed with “dummy” for clarity:
\`\`\`javascript
[postAggregations]() [{
[type]() “arithmetic”,
[name]() “avg\_random”,
[fn]() “/”,
[fields]() [
{ [type]() “fieldAccess”, [fieldName]() “randomNumberSum” },
{ [type]() “fieldAccess”, [fieldName]() “rows” }
]
}],
\`\`\`
```javascript
"dimensions": [],
```
A groupBy also requires the JSON field "aggregations" (See [Aggregations](Aggregations.html)), which are applied to the column specified by fieldName and the output of the aggregation will be named according to the value in the "name" field:
```javascript
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "events", "name": "e" },
{ "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum" }
],
```
You can also specify postAggregations, which are applied after data has been aggregated for the current granularity and dimensions bucket. See [Post Aggregations](Post Aggregations.html) for a detailed description. In the rand example, an arithmetic type operation (division, as specified by "fn") is performed with the result "name" of "avg_random". The "fields" field specifies the inputs from the aggregation stage to this expression. Note that identifiers corresponding to "name" JSON field inside the type "fieldAccess" are required but not used outside this expression, so they are prefixed with "dummy" for clarity:
```javascript
"postAggregations": [{
"type": "arithmetic",
"name": "avg_random",
"fn": "/",
"fields": [
{ "type": "fieldAccess", "fieldName": "randomNumberSum" },
{ "type": "fieldAccess", "fieldName": "rows" }
]
}],
```
The time range(s) of the query; data outside the specified intervals will not be used; this example specifies from October 1, 2012 until January 1, 2020:
\`\`\`javascript
[intervals]() [“2012-10-01T00:00/2020-01-01T00”]
```javascript
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
}
\`\`\`
```
Query Operators
---------------
@ -99,8 +113,8 @@ The following table summarizes query properties.
|timeseries, groupBy, search, timeBoundary|dataSource|query is applied to this data source|yes|
|timeseries, groupBy, search|intervals|range of time series to include in query|yes|
|timeseries, groupBy, search, timeBoundary|context|This is a key-value map that can allow the query to alter some of the behavior of a query. It is primarily used for debugging, for example if you include `"bySegment":true` in the map, you will get results associated with the data segment they came from.|no|
|timeseries, groupBy, search|filter|Specifies the filter (the “WHERE” clause in SQL) for the query. See [Filters](Filters.html)|no|
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. “hour”). See [Granularities](Granularities.html) for more information.|no|
|timeseries, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|groupBy|dimensions|constrains the groupings; if empty, then one value per time granularity bucket|yes|
|timeseries, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|timeseries, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Realtime
========
@ -23,38 +23,38 @@ Configuration
Realtime nodes take a mix of base server configuration and spec files that describe how to connect, process and expose the realtime feed. See [Configuration](Configuration.html) for information about general server configuration.
### Realtime “specFile”
### Realtime "specFile"
The property `druid.realtime.specFile` has the path of a file (absolute or relative path and file name) with realtime specifications in it. This “specFile” should be a JSON Array of JSON objects like the following:
The property `druid.realtime.specFile` has the path of a file (absolute or relative path and file name) with realtime specifications in it. This "specFile" should be a JSON Array of JSON objects like the following:
<code>
[{
"schema" : { "dataSource":"dataSourceName",
"aggregators":[ {"type":"count", "name":"events"},
{"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
"indexGranularity":"minute",
```json
[{
"schema" : { "dataSource":"dataSourceName",
"aggregators":[ {"type":"count", "name":"events"},
{"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
"indexGranularity":"minute",
"shardSpec" : { "type": "none" } },
"config" : { "maxRowsInMemory" : 500000,
"intermediatePersistPeriod" : "PT10m" },
"firehose" : { "type" : "kafka-0.7.2",
"consumerProps" : { "zk.connect" : "zk_connect_string",
"zk.connectiontimeout.ms" : "15000",
"zk.sessiontimeout.ms" : "15000",
"zk.synctime.ms" : "5000",
"groupid" : "consumer-group",
"fetch.size" : "1048586",
"autooffset.reset" : "largest",
"autocommit.enable" : "false" },
"feed" : "your_kafka_topic",
"parser" : { "timestampSpec" : { "column" : "timestamp", "format" : "iso" },
"data" : { "format" : "json" },
"dimensionExclusions" : ["value"] } },
"plumber" : { "type" : "realtime",
"windowPeriod" : "PT10m",
"segmentGranularity":"hour",
"basePersistDirectory" : "/tmp/realtime/basePersist" }
}]
</code>
"config" : { "maxRowsInMemory" : 500000,
"intermediatePersistPeriod" : "PT10m" },
"firehose" : { "type" : "kafka-0.7.2",
"consumerProps" : { "zk.connect" : "zk_connect_string",
"zk.connectiontimeout.ms" : "15000",
"zk.sessiontimeout.ms" : "15000",
"zk.synctime.ms" : "5000",
"groupid" : "consumer-group",
"fetch.size" : "1048586",
"autooffset.reset" : "largest",
"autocommit.enable" : "false" },
"feed" : "your_kafka_topic",
"parser" : { "timestampSpec" : { "column" : "timestamp", "format" : "iso" },
"data" : { "format" : "json" },
"dimensionExclusions" : ["value"] } },
"plumber" : { "type" : "realtime",
"windowPeriod" : "PT10m",
"segmentGranularity":"hour",
"basePersistDirectory" : "/tmp/realtime/basePersist" }
}]
```
This is a JSON Array so you can give more than one realtime stream to a given node. The number you can put in the same process depends on the exact configuration. In general, it is best to think of each realtime stream handler as requiring 2-threads: 1 thread for data consumption and aggregation, 1 thread for incremental persists and other background tasks.
@ -68,7 +68,7 @@ This describes the data schema for the output Druid segment. More information ab
|-----|----|-----------|--------|
|aggregators|Array of Objects|The list of aggregators to use to aggregate colliding rows together.|yes|
|dataSource|String|The name of the dataSource that the segment belongs to.|yes|
|indexGranularity|String|The granularity of the data inside the segment. E.g. a value of “minute” will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows.|yes|
|indexGranularity|String|The granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows.|yes|
|segmentGranularity|String|The granularity of the segment as a whole. This is generally larger than the index granularity and describes the rate at which the realtime server will push segments out for historical servers to take over.|yes|
|shardSpec|Object|This describes the shard that is represented by this server. This must be specified properly in order to have multiple realtime nodes indexing the same data stream in a sharded fashion.|no|
@ -94,7 +94,8 @@ Constraints
The following tables summarizes constraints between settings in the spec file for the Realtime subsystem.
|*. Name |*. Effect |*. Minimum |*. Recommended |
|Name|Effect|Minimum|Recommended|
|----|------|-------|-----------|
| windowPeriod| when reading an InputRow, events with timestamp older than now minus this window are discarded | time jitter tolerance | use this to reject outliers |
| segmentGranularity| time granularity (minute, hour, day, week, month) for loading data at query time | equal to indexGranularity| more than indexGranularity|
| indexGranularity| time granularity (minute, hour, day, week, month) of indexes | less than segmentGranularity| minute, hour, day, week, month |
@ -115,8 +116,8 @@ Extending the code
Realtime integration is intended to be extended in two ways:
1. Connect to data streams from varied systems ([Firehose](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/FirehoseFactory.java))
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/PlumberSchool.java))
1. Connect to data streams from varied systems ([Firehose](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/firehose/FirehoseFactory.java))
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/plumber/PlumberSchool.java))
The expectations are that the former will be very common and something that users of Druid will do on a fairly regular basis. Most users will probably never have to deal with the latter form of customization. Indeed, we hope that all potential use cases can be packaged up as part of Druid proper without requiring proprietary customization.
@ -124,34 +125,34 @@ Given those expectations, adding a firehose is straightforward and completely en
We will do our best to accept contributions from the community of new Firehoses and Plumbers, but we also understand the requirement for being able to plug in your own proprietary implementations. The model for doing this is by embedding the druid code in another project and writing your own `main()` method that initializes a RealtimeNode object and registers your proprietary objects with it.
<code>
public class MyRealtimeMain
{
private static final Logger log = new Logger(MyRealtimeMain.class);
```java
public class MyRealtimeMain
{
private static final Logger log = new Logger(MyRealtimeMain.class);
public static void main(String[] args) throws Exception
{
LogLevelAdjuster.register();
public static void main(String[] args) throws Exception
{
LogLevelAdjuster.register();
Lifecycle lifecycle = new Lifecycle();
Lifecycle lifecycle = new Lifecycle();
lifecycle.addManagedInstance(
RealtimeNode.builder()
.build()
.registerJacksonSubtype(foo.bar.MyFirehose.class)
);
lifecycle.addManagedInstance(
RealtimeNode.builder()
.build()
.registerJacksonSubtype(foo.bar.MyFirehose.class)
);
try {
lifecycle.start();
}
catch (Throwable t) {
log.info(t, "Throwable caught at startup, committing seppuku");
System.exit(2);
}
lifecycle.join();
}
try {
lifecycle.start();
}
</code>
catch (Throwable t) {
log.info(t, "Throwable caught at startup, committing seppuku");
System.exit(2);
}
lifecycle.join();
}
}
```
Pluggable pieces of the system are either handled by a setter on the RealtimeNode object, or they are configuration driven and need to be setup to allow for [Jackson polymorphic deserialization](http://wiki.fasterxml.com/JacksonPolymorphicDeserialization) and registered via the relevant methods on the RealtimeNode object.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Note: It is recommended that the master console is used to configure rules. However, the master node does have HTTP endpoints to programmatically configure rules.
@ -12,33 +12,33 @@ Load rules indicate how many replicants of a segment should exist in a server ti
Interval load rules are of the form:
<code>
{
"type" : "loadByInterval",
"interval" : "2012-01-01/2013-01-01",
"tier" : "hot"
}
</code>
```json
{
"type" : "loadByInterval",
"interval" : "2012-01-01/2013-01-01",
"tier" : "hot"
}
```
type - this should always be “loadByInterval”
interval - A JSON Object representing ISO-8601 Intervals
tier - the configured compute node tier
* `type` - this should always be "loadByInterval"
* `interval` - A JSON Object representing ISO-8601 Intervals
* `tier` - the configured compute node tier
### Period Load Rule
Period load rules are of the form:
<code>
{
"type" : "loadByInterval",
"period" : "P1M",
"tier" : "hot"
}
</code>
```json
{
"type" : "loadByPeriod",
"period" : "P1M",
"tier" : "hot"
}
```
type - this should always be “loadByPeriod”
period - A JSON Object representing ISO-8601 Periods
tier - the configured compute node tier
* `type` - this should always be "loadByPeriod"
* `period` - A JSON Object representing ISO-8601 Periods
* `tier` - the configured compute node tier
The interval of a segment will be compared against the specified period. The rule matches if the period overlaps the interval.
@ -51,15 +51,15 @@ Drop rules indicate when segments should be dropped from the cluster.
Interval drop rules are of the form:
<code>
{
"type" : "dropByInterval",
"interval" : "2012-01-01/2013-01-01"
}
</code>
```json
{
"type" : "dropByInterval",
"interval" : "2012-01-01/2013-01-01"
}
```
type - this should always be “dropByInterval”
interval - A JSON Object representing ISO-8601 Periods
* `type` - this should always be "dropByInterval"
* `interval` - A JSON Object representing ISO-8601 Periods
A segment is dropped if the interval contains the interval of the segment.
@ -67,14 +67,14 @@ A segment is dropped if the interval contains the interval of the segment.
Period drop rules are of the form:
<code>
{
"type" : "dropByPeriod",
"period" : "P1M"
}
</code>
```json
{
"type" : "dropByPeriod",
"period" : "P1M"
}
```
type - this should always be “dropByPeriod”
period - A JSON Object representing ISO-8601 Periods
* `type` - this should always be "dropByPeriod"
* `period` - A JSON Object representing ISO-8601 Periods
The interval of a segment will be compared against the specified period. The period is from some time in the past to the current time. The rule matches if the period contains the interval.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
A search query returns dimension values that match the search specification.
@ -28,14 +28,14 @@ There are several main parts to a search query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be “search”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|filter|See [Filters](Filters.html)|no|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|query|See [SearchQuerySpec](SearchQuerySpec.html).|yes|
|sort|How the results of the search should sorted. Two possible types here are “lexicographic” and “strlen”.|yes|
|sort|How the results of the search should sorted. Two possible types here are "lexicographic" and "strlen".|yes|
|context|An additional JSON Object which can be used to specify certain flags.|no|
The format of the result is:

View File

@ -1,26 +1,28 @@
---
layout: default
layout: doc_page
---
Search query specs define how a “match” is defined between a search value and a dimension value. The available search query specs are:
Search query specs define how a "match" is defined between a search value and a dimension value. The available search query specs are:
InsensitiveContainsSearchQuerySpec
----------------------------------
If any part of a dimension value contains the value specified in this search query spec, regardless of case, a “match” occurs. The grammar is:
If any part of a dimension value contains the value specified in this search query spec, regardless of case, a "match" occurs. The grammar is:
<code>{
"type" : "insensitive_contains",
"value" : "some_value"
}
</code>
```json
{
"type" : "insensitive_contains",
"value" : "some_value"
}
```
FragmentSearchQuerySpec
-----------------------
If any part of a dimension value contains any of the values specified in this search query spec, regardless of case, a “match” occurs. The grammar is:
If any part of a dimension value contains any of the values specified in this search query spec, regardless of case, a "match" occurs. The grammar is:
<code>{
"type" : "fragment",
"values" : ["fragment1", "fragment2"]
}
</code>
```json
{
"type" : "fragment",
"values" : ["fragment1", "fragment2"]
}
```

View File

@ -1,26 +1,28 @@
---
layout: default
layout: doc_page
---
Segment metadata queries return per segment information about:
\* Cardinality of all columns in the segment
\* Estimated byte size for the segment columns in TSV format
\* Interval the segment covers
\* Column type of all the columns in the segment
\* Estimated total segment byte size in TSV format
\* Segment id
<code>{
"queryType":"segmentMetadata",
"dataSource":"sample_datasource",
"intervals":["2013-01-01/2014-01-01"],
}
</code>
* Cardinality of all columns in the segment
* Estimated byte size for the segment columns in TSV format
* Interval the segment covers
* Column type of all the columns in the segment
* Estimated total segment byte size in TSV format
* Segment id
```json
{
"queryType":"segmentMetadata",
"dataSource":"sample_datasource",
"intervals":["2013-01-01/2014-01-01"],
}
```
There are several main parts to a segment metadata query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be “segmentMetadata”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|queryType|This String should always be "segmentMetadata"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|merge|Merge all individual segment metadata results into a single result|no|
@ -28,31 +30,16 @@ There are several main parts to a segment metadata query:
The format of the result is:
<code>[ {
"id" : "some_id",
"intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ],
"columns" : {
"__time" : {
"type" : "LONG",
"size" : 407240380,
"cardinality" : null
},
"dim1" : {
"type" : "STRING",
"size" : 100000,
"cardinality" : 1944
},
"dim2" : {
"type" : "STRING",
"size" : 100000,
"cardinality" : 1504
},
"metric1" : {
"type" : "FLOAT",
"size" : 100000,
"cardinality" : null
}
},
"size" : 300000
} ]
</code>
```json
[ {
"id" : "some_id",
"intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ],
"columns" : {
"__time" : { "type" : "LONG", "size" : 407240380, "cardinality" : null },
"dim1" : { "type" : "STRING", "size" : 100000, "cardinality" : 1944 },
"dim2" : { "type" : "STRING", "size" : 100000, "cardinality" : 1504 },
"metric1" : { "type" : "FLOAT", "size" : 100000, "cardinality" : null }
},
"size" : 300000
} ]
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Segments
========
@ -14,36 +14,28 @@ Naming Convention
Identifiers for segments are typically constructed using the segment datasource, interval start time (in ISO 8601 format), interval end time (in ISO 8601 format), and a version. If data is additionally sharded beyond a time range, the segment identifier will also contain a partition number.
An example segment identifier may be:
datasource\_intervalStart\_intervalEnd\_version\_partitionNum
datasource_intervalStart_intervalEnd_version_partitionNum
Segment Components
------------------
A segment is compromised of several files, listed below.
### `version.bin`
* `version.bin`
4 bytes representing the current segment version as an integer. E.g., for v9 segments, the version is 0x0, 0x0, 0x0, 0x9
4 bytes representing the current segment version as an integer. E.g., for v9 segments, the version is 0x0, 0x0, 0x0, 0x9
### `meta.smoosh`
* `meta.smoosh`
A file with metadata (filenames and offsets) about the contents of the other `smoosh` files
A file with metadata (filenames and offsets) about the contents of the other `smoosh` files
### `XXXXX.smoosh`
* `XXXXX.smoosh`
There are some number of these files, which are concatenated binary data
There are some number of these files, which are concatenated binary data
The `smoosh` files represent multiple files “smooshed” together in order to minimize the number of file descriptors that must be open to house the data. They are files of up to 2GB in size (to match the limit of a memory mapped ByteBuffer in Java). The `smoosh` files house individual files for each of the columns in the data as well as an `index.drd` file with extra metadata about the segment.
The `smoosh` files represent multiple files "smooshed" together in order to minimize the number of file descriptors that must be open to house the data. They are files of up to 2GB in size (to match the limit of a memory mapped ByteBuffer in Java). The `smoosh` files house individual files for each of the columns in the data as well as an `index.drd` file with extra metadata about the segment.
There is also a special column called `__time` that refers to the time column of the segment. This will hopefully become less and less special as the code evolves, but for now its as special as my Mommy always told me I am.
### `index.drd`
The `index.drd` file houses 3 pieces of data in order
1. The names of all of the columns of the data
2. The names of the “dimensions” of the data (these are the dictionary-encoded, string columns. This is here to support some legacy APIs and will be superfluous in the future)
3. The data interval represented by this segment stored as the start and end timestamps as longs
There is also a special column called `__time` that refers to the time column of the segment. This will hopefully become less and less special as the code evolves, but for now its as special as my Mommy always told me I am.
Format of a column
------------------

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Note: This feature is highly experimental and only works with spatially indexed dimensions.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Note: This feature is highly experimental.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
This page describes how to use Riak-CS for deep storage instead of S3. We are still setting up some of the peripheral stuff (file downloads, etc.).
@ -223,4 +223,4 @@ This just walks through getting the relevant software installed and running. Yo
/etc/init.d/druid_master start
/etc/init.d/druid_realtime start
/etc/init.d/druid_broker start
/etc/init.d/druid_compute start
/etc/init.d/druid_compute start

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Numerous backend engineers at [Metamarkets](http://www.metamarkets.com) work on Druid full-time. If you any questions about usage or code, feel free to contact any of us.

View File

@ -1,7 +1,7 @@
---
layout: default
layout: doc_page
---
Tasks are run on workers and always operate on a single datasource. Once an indexer coordinator node accepts a task, a lock is created for the datasource and interval specified in the task. Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release locks early if they desire. Tasks ids are unique by naming them using UUIDs or the timestamp in which the task was created. Tasks are also part of a “task group”, which is a set of tasks that can share interval locks.
Tasks are run on workers and always operate on a single datasource. Once an indexer coordinator node accepts a task, a lock is created for the datasource and interval specified in the task. Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release locks early if they desire. Tasks ids are unique by naming them using UUIDs or the timestamp in which the task was created. Tasks are also part of a "task group", which is a set of tasks that can share interval locks.
There are several different types of tasks.

View File

@ -1,6 +1,7 @@
---
layout: default
layout: doc_page
---
YourKit supports the Druid open source projects with its
full-featured Java Profiler.
YourKit, LLC is the creator of innovative and intelligent tools for profiling
@ -8,4 +9,4 @@ Java and .NET applications. Take a look at YourKit's software products:
<a href="http://www.yourkit.com/java/profiler/index.jsp">YourKit Java
Profiler</a> and
<a href="http://www.yourkit.com/.net/profiler/index.jsp">YourKit .NET
Profiler</a>.
Profiler</a>.

View File

@ -1,29 +1,31 @@
---
layout: default
layout: doc_page
---
Time boundary queries return the earliest and latest data points of a data set. The grammar is:
<code>{
"queryType" : "timeBoundary",
"dataSource": "sample_datasource"
}
</code>
```json
{
"queryType" : "timeBoundary",
"dataSource": "sample_datasource"
}
```
There are 3 main parts to a time boundary query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be “timeBoundary”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|queryType|This String should always be "timeBoundary"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|context|An additional JSON Object which can be used to specify certain flags.|no|
The format of the result is:
<code>[ {
"timestamp" : "2013-05-09T18:24:00.000Z",
"result" : {
"minTime" : "2013-05-09T18:24:00.000Z",
"maxTime" : "2013-05-09T18:37:00.000Z"
}
} ]
</code>
```json
[ {
"timestamp" : "2013-05-09T18:24:00.000Z",
"result" : {
"minTime" : "2013-05-09T18:24:00.000Z",
"maxTime" : "2013-05-09T18:37:00.000Z"
}
} ]
```

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Timeseries queries
==================
@ -8,81 +8,46 @@ These types of queries take a timeseries query object and return an array of JSO
An example timeseries query object is shown below:
<pre>
<code>
```json
{
[queryType]() “timeseries”,
[dataSource]() “sample\_datasource”,
[granularity]() “day”,
[filter]() {
[type]() “and”,
[fields]() [
{
[type]() “selector”,
[dimension]() “sample\_dimension1”,
[value]() “sample\_value1”
},
{
[type]() “or”,
[fields]() [
{
[type]() “selector”,
[dimension]() “sample\_dimension2”,
[value]() “sample\_value2”
},
{
[type]() “selector”,
[dimension]() “sample\_dimension3”,
[value]() “sample\_value3”
}
]
}
]
},
[aggregations]() [
{
[type]() “longSum”,
[name]() “sample\_name1”,
[fieldName]() “sample\_fieldName1”
},
{
[type]() “doubleSum”,
[name]() “sample\_name2”,
[fieldName]() “sample\_fieldName2”
}
],
[postAggregations]() [
{
[type]() “arithmetic”,
[name]() “sample\_divide”,
[fn]() “/”,
[fields]() [
{
[type]() “fieldAccess”,
[name]() “sample\_name1”,
[fieldName]() “sample\_fieldName1”
},
{
[type]() “fieldAccess”,
[name]() “sample\_name2”,
[fieldName]() “sample\_fieldName2”
}
]
}
],
[intervals]() [
“2012-01-01T00:00:00.000/2012-01-03T00:00:00.000”
]
"queryType": "timeseries",
"dataSource": "sample_datasource",
"granularity": "day",
"filter": {
"type": "and",
"fields": [
{ "type": "selector", "dimension": "sample_dimension1", "value": "sample_value1" },
{ "type": "or",
"fields": [
{ "type": "selector", "dimension": "sample_dimension2", "value": "sample_value2" },
{ "type": "selector", "dimension": "sample_dimension3", "value": "sample_value3" }
]
}
]
},
"aggregations": [
{ "type": "longSum", "name": "sample_name1", "fieldName": "sample_fieldName1" },
{ "type": "doubleSum", "name": "sample_name2", "fieldName": "sample_fieldName2" }
],
"postAggregations": [
{ "type": "arithmetic",
"name": "sample_divide",
"fn": "/",
"fields": [
{ "type": "fieldAccess", "name": "sample_name1", "fieldName": "sample_fieldName1" },
{ "type": "fieldAccess", "name": "sample_name2", "fieldName": "sample_fieldName2" }
]
}
],
"intervals": [ "2012-01-01T00:00:00.000/2012-01-03T00:00:00.000" ]
}
</pre>
</code>
```
There are 7 main parts to a timeseries query:
|property|description|required?|
|--------|-----------|---------|
|queryType|This String should always be “timeseries”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|queryType|This String should always be "timeseries"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|filter|See [Filters](Filters.html)|no|
@ -91,28 +56,17 @@ There are 7 main parts to a timeseries query:
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|context|An additional JSON Object which can be used to specify certain flags.|no|
To pull it all together, the above query would return 2 data points, one for each day between 2012-01-01 and 2012-01-03, from the “sample\_datasource” table. Each data point would be the (long) sum of sample\_fieldName1, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set. The output looks like this:
To pull it all together, the above query would return 2 data points, one for each day between 2012-01-01 and 2012-01-03, from the "sample\_datasource" table. Each data point would be the (long) sum of sample\_fieldName1, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set. The output looks like this:
<pre>
<code>
```json
[
{
[timestamp]() “2012-01-01T00:00:00.000Z”,
[result]() {
[sample\_name1]() <some_value>,
[sample\_name2]() <some_value>,
[sample\_divide]() <some_value>
}
},
{
[timestamp]() “2012-01-02T00:00:00.000Z”,
[result]() {
[sample\_name1]() <some_value>,
[sample\_name2]() <some_value>,
[sample\_divide]() <some_value>
}
}
{
"timestamp": "2012-01-01T00:00:00.000Z",
"result": { "sample_name1": <some_value>, "sample_name2": <some_value>, "sample_divide": <some_value> }
},
{
"timestamp": "2012-01-02T00:00:00.000Z",
"result": { "sample_name1": <some_value>, "sample_name2": <some_value>, "sample_divide": <some_value> }
}
]
</pre>
</code>
```

View File

@ -1,40 +1,42 @@
---
layout: default
layout: doc_page
---
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
About the data
--------------
The data source well be working with is Wikipedia edits. Each time an edit is made in Wikipedia, an event gets pushed to an IRC channel associated with the language of the Wikipedia page. We scrape IRC channels for several different languages and load this data into Druid.
The data source we'll be working with is Wikipedia edits. Each time an edit is made in Wikipedia, an event gets pushed to an IRC channel associated with the language of the Wikipedia page. We scrape IRC channels for several different languages and load this data into Druid.
Each event has a timestamp indicating the time of the edit (in UTC time), a list of dimensions indicating various metadata about the event (such as information about the user editing the page and where the user resides), and a list of metrics associated with the event (such as the number of characters added and deleted).
Specifically. the data schema looks like so:
Dimensions (things to filter on):
\`\`\`json
“page”
“language”
“user”
“unpatrolled”
“newPage”
“robot”
“anonymous”
“namespace”
“continent”
“country”
“region”
“city”
\`\`\`
```json
"page"
"language"
"user"
"unpatrolled"
"newPage"
"robot"
"anonymous"
"namespace"
"continent"
"country"
"region"
"city"
```
Metrics (things to aggregate over):
\`\`\`json
“count”
“added”
“delta”
“deleted”
\`\`\`
```json
"count"
"added"
"delta"
"deleted"
```
These metrics track the number of characters added, deleted, and changed.
@ -45,120 +47,120 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
### Download a Tarball
Weve built a tarball that contains everything youll need. Youll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
Download this file to a directory of your choosing.
You can extract the awesomeness within by issuing:
tar -zxvf druid-services-*-bin.tar.gz
```
tar -zxvf druid-services-*-bin.tar.gz
```
Not too lost so far right? Thats great! If you cd into the directory:
Not too lost so far right? That's great! If you cd into the directory:
cd druid-services-0.5.54
```
cd druid-services-0.5.54
```
You should see a bunch of files:
\* run\_example\_server.sh
\* run\_example\_client.sh
\* LICENSE, config, examples, lib directories
* run_example_server.sh
* run_example_client.sh
* LICENSE, config, examples, lib directories
Running Example Scripts
-----------------------
Lets start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
./run_example_server.sh
```
./run_example_server.sh
```
Select “wikipedia”.
Select "wikipedia".
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
<code>
2013-07-19 21:54:05,154 INFO [main] com.metamx.druid.realtime.RealtimeNode - Starting Jetty
2013-07-19 21:54:05,154 INFO [main] org.mortbay.log - jetty-6.1.x
2013-07-19 21:54:05,171 INFO [chief-wikipedia] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at [2013-07-19T22:03:00.000Z]
2013-07-19 21:54:05,246 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
</code>
```
2013-07-19 21:54:05,154 INFO [main] com.metamx.druid.realtime.RealtimeNode - Starting Jetty
2013-07-19 21:54:05,154 INFO [main] org.mortbay.log - jetty-6.1.x
2013-07-19 21:54:05,171 INFO [chief-wikipedia] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at [2013-07-19T22:03:00.000Z]
2013-07-19 21:54:05,246 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
```
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
Okay, things are about to get real(~~time). To query the real-time node youve spun up, you can issue:
\<pre\>./run\_example\_client.sh\</pre\>
Select “wikipedia” once again. This script issues ]s to the data weve been ingesting. The query looks like this:
\`\`\`json
Okay, things are about to get real-time. To query the real-time node you've spun up, you can issue:
```
./run_example_client.sh
```
Select "wikipedia" once again. This script issues [GroupByQuery](GroupByQuery.html)s to the data we've been ingesting. The query looks like this:
```json
{
[queryType]("groupBy"),
[dataSource]("wikipedia"),
[granularity]("minute"),
[dimensions]([)
“page”
],
[aggregations]([)
{
[type]("count"),
[name]("rows")
},
{
[type]("longSum"),
[fieldName]("edit_count"),
[name]("count")
}
],
[filter]({)
[type]("selector"),
[dimension]("namespace"),
[value]("article")
},
[intervals]([)
“2013-06-01T00:00/2020-01-01T00”
]
"queryType":"groupBy",
"dataSource":"wikipedia",
"granularity":"minute",
"dimensions":[ "page" ],
"aggregations":[
{"type":"count", "name":"rows"},
{"type":"longSum", "fieldName":"edit_count", "name":"count"}
],
"filter":{ "type":"selector", "dimension":"namespace", "value":"article" },
"intervals":[ "2013-06-01T00:00/2020-01-01T00" ]
}
\`\`\`
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“namespace”** dimension, to only look at edits on **“articles”**. Our **aggregations** are what we are calculating: a count of the number of data rows, and a count of the number of edits that have occurred.
```
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the `dimensions` field: `["page"]`. We are **filtering** via the `namespace` dimension, to only look at edits on `articles`. Our **aggregations** are what we are calculating: a count of the number of data rows, and a count of the number of edits that have occurred.
The result looks something like this:
\`\`\`json
```json
[
{
[version]() “v1”,
[timestamp]() “2013-09-04T21:44:00.000Z”,
[event]() {
[count]() 0,
[page]() “2013\\u201314\_Brentford\_F.C.*season",
[rows]() 1
}
},
{
[version]() "v1",
[timestamp]() "2013-09-04T21:44:00.000Z",
[event]() {
[count]() 0,
[page]() "8e*00e9tape\_du\_Tour\_de\_France\_2013”,
[rows]() 1
}
},
{
[version]() “v1”,
[timestamp]() “2013-09-04T21:44:00.000Z”,
[event]() {
[count]() 0,
[page]() “Agenda\_of\_the\_Tea\_Party\_movement”,
[rows]() 1
}
},
\`\`\`
This groupBy query is a bit complicated and well return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
{
"version": "v1",
"timestamp": "2013-09-04T21:44:00.000Z",
"event": { "count": 0, "page": "2013\u201314_Brentford_F.C._season", "rows": 1 }
},
{
"version": "v1",
"timestamp": "2013-09-04T21:44:00.000Z",
"event": { "count": 0, "page": "8e_\u00e9tape_du_Tour_de_France_2013", "rows": 1 }
},
{
"version": "v1",
"timestamp": "2013-09-04T21:44:00.000Z",
"event": { "count": 0, "page": "Agenda_of_the_Tea_Party_movement", "rows": 1 }
},
...
```
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
h2. Querying Druid
In your favorite editor, create the file:
\<pre\>time\_boundary\_query.body\</pre\>
```
time_boundary_query.body
```
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
\<pre\><code>
```
{
[queryType]() “timeBoundary”,
[dataSource]() “wikipedia”
"queryType": "timeBoundary",
"dataSource": "wikipedia"
}
</code>\</pre\>
The ] is one of the simplest Druid queries. To run the query, you can issue:
\<pre\><code> curl~~X POST http://localhost:8083/druid/v2/?pretty ~~H content-type: application/json~~d ```` time_boundary_query.body</code></pre>
```
The [TimeBoundaryQuery](TimeBoundaryQuery.html) is one of the simplest Druid queries. To run the query, you can issue:
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @time_boundary_query.body
```
We get something like this JSON back:
@ -171,186 +173,146 @@ We get something like this JSON back:
}
} ]
```
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
Return to your favorite editor and create the file:
<pre>timeseries_query.body</pre>
```
timeseries_query.body
```
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
<pre><code>
```
{
"queryType": "timeseries",
"dataSource": "wikipedia",
"intervals": [
"2010-01-01/2020-01-01"
],
"intervals": [ "2010-01-01/2020-01-01" ],
"granularity": "all",
"aggregations": [
{
"type": "longSum",
"fieldName": "count",
"name": "edit_count"
},
{
"type": "doubleSum",
"fieldName": "added",
"name": "chars_added"
}
{"type": "longSum", "fieldName": "count", "name": "edit_count"},
{"type": "doubleSum", "fieldName": "added", "name": "chars_added"}
]
}
</code></pre>
```
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
To issue the query and get some results, run the following in your command line:
<pre><code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
</pre>
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries_query.body
```
Once again, you should get a JSON blob of text back with your results, that looks something like this:
\`\`\`json
```json
[ {
“timestamp” : “2013-09-04T21:44:00.000Z”,
“result” : {
“chars\_added” : 312670.0,
“edit\_count” : 733
}
"timestamp" : "2013-09-04T21:44:00.000Z",
"result" : { "chars_added" : 312670.0, "edit_count" : 733 }
} ]
\`\`\`
```
If you issue the query again, you should notice your results updating.
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct! We can specify different granularities to bucket our results, like so:
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct! We can specify different granularities to bucket our results, like so:
<code>
{
"queryType": "timeseries",
"dataSource": "wikipedia",
"intervals": [
"2010-01-01/2020-01-01"
],
"granularity": "minute",
"aggregations": [
{
"type": "longSum",
"fieldName": "count",
"name": "edit_count"
},
{
"type": "doubleSum",
"fieldName": "added",
"name": "chars_added"
}
]
}
</code>
```
{
"queryType": "timeseries",
"dataSource": "wikipedia",
"intervals": [ "2010-01-01/2020-01-01" ],
"granularity": "minute",
"aggregations": [
{"type": "longSum", "fieldName": "count", "name": "edit_count"},
{"type": "doubleSum", "fieldName": "added", "name": "chars_added"}
]
}
```
This gives us something like the following:
\`\`\`json
```json
[
{
“timestamp” : “2013-09-04T21:44:00.000Z”,
“result” : {
“chars\_added” : 30665.0,
“edit\_count” : 128
}
}, {
“timestamp” : “2013-09-04T21:45:00.000Z”,
“result” : {
“chars\_added” : 122637.0,
“edit\_count” : 167
}
}, {
“timestamp” : “2013-09-04T21:46:00.000Z”,
“result” : {
“chars\_added” : 78938.0,
“edit\_count” : 159
}
"timestamp" : "2013-09-04T21:44:00.000Z",
"result" : { "chars_added" : 30665.0, "edit_count" : 128 }
},
{
"timestamp" : "2013-09-04T21:45:00.000Z",
"result" : { "chars_added" : 122637.0, "edit_count" : 167 }
},
{
"timestamp" : "2013-09-04T21:46:00.000Z",
"result" : { "chars_added" : 78938.0, "edit_count" : 159 }
},
\`\`\`
...
]
```
Solving a Problem
-----------------
One of Druids main powers is to provide answers to problems, so lets pose a problem. What if we wanted to know what the top pages in the US are, ordered by the number of edits over the last few minutes youve been going through this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results and it turns out we can!
One of Druid's main powers is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top pages in the US are, ordered by the number of edits over the last few minutes you've been going through this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results... and it turns out we can!
Lets create the file:
Let's create the file:
group_by_query.body</pre>
and put the following in there:
<pre><code>
{
"queryType": "groupBy",
"dataSource": "wikipedia",
"granularity": "all",
"dimensions": [
"page"
],
"orderBy": {
"type": "default",
"columns": [
{
"dimension": "edit_count",
"direction": "DESCENDING"
}
],
"limit": 10
},
"aggregations": [
{
"type": "longSum",
"fieldName": "count",
"name": "edit_count"
}
],
"filter": {
"type": "selector",
"dimension": "country",
"value": "United States"
},
"intervals": [
"2012-10-01T00:00/2020-01-01T00"
]
}
</code>
```
group_by_query.body
```
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects weve introduced to our query can help define the format of our results and provide an answer to our question.
and put the following in there:
```
{
"queryType": "groupBy",
"dataSource": "wikipedia",
"granularity": "all",
"dimensions": [ "page" ],
"orderBy": {
"type": "default",
"columns": [ { "dimension": "edit_count", "direction": "DESCENDING" } ],
"limit": 10
},
"aggregations": [
{"type": "longSum", "fieldName": "count", "name": "edit_count"}
],
"filter": { "type": "selector", "dimension": "country", "value": "United States" },
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
}
```
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
If you issue the query:
<code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body
```
You should see an answer to our question. As an example, some results are shown below:
\`\`\`json
```json
[
{
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“page” : “RTC\_Transit”,
“edit\_count” : 6
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“page” : “List\_of\_Deadly\_Women\_episodes”,
“edit\_count” : 4
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“page” : “User\_talk:David\_Biddulph”,
“edit\_count” : 4
}
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : { "page" : "RTC_Transit", "edit_count" : 6 }
},
{
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : { "page" : "List_of_Deadly_Women_episodes", "edit_count" : 4 }
},
{
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : { "page" : "User_talk:David_Biddulph", "edit_count" : 4 }
},
\`\`\`
...
```
Feel free to tweak other query parameters to answer other questions you may have about the data.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
Welcome back! In our first [tutorial](https://github.com/metamx/druid/wiki/Tutorial%3A-A-First-Look-at-Druid), we introduced you to the most basic Druid setup: a single realtime node. We streamed in some data and queried it. Realtime nodes collect very recent data and periodically hand that data off to the rest of the Druid cluster. Some questions about the architecture must naturally come to mind. What does the rest of Druid cluster look like? How does Druid load available static data?
@ -14,6 +14,7 @@ If you followed the first tutorial, you should already have Druid downloaded. If
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
and untar the contents within by issuing:
```bash
tar -zxvf druid-services-*-bin.tar.gz
cd druid-services-*
@ -32,15 +33,18 @@ For deep storage, we have made a public S3 bucket (static.druid.io) available wh
1. If you don't already have it, download MySQL Community Server here: [http://dev.mysql.com/downloads/mysql/](http://dev.mysql.com/downloads/mysql/)
2. Install MySQL
3. Create a druid user and database
```bash
mysql -u root
```
```sql
GRANT ALL ON druid.* TO 'druid'@'localhost' IDENTIFIED BY 'diurd';
CREATE database druid;
```
### Setting up Zookeeper ###
```bash
curl http://www.motorlogy.com/apache/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
tar xzf zookeeper-3.4.5.tar.gz
@ -55,6 +59,7 @@ cd ..
Similar to the first tutorial, the data we will be loading is based on edits that have occurred on Wikipedia. Every time someone edits a page in Wikipedia, metadata is generated about the editor and edited page. Druid collects each individual event and packages them together in a container known as a [segment](https://github.com/metamx/druid/wiki/Segments). Segments contain data over some span of time. We've prebuilt a segment for this tutorial and will cover making your own segments in other [pages](https://github.com/metamx/druid/wiki/Loading-Your-Data).The segment we are going to work with has the following format:
Dimensions (things to filter on):
```json
"page"
"language"
@ -71,6 +76,7 @@ Dimensions (things to filter on):
```
Metrics (things to aggregate over):
```json
"count"
"added"
@ -98,7 +104,7 @@ To create the master config file:
mkdir config/master
```
Under the directory we just created, create the file ```runtime.properties``` with the following contents:
Under the directory we just created, create the file `runtime.properties` with the following contents:
```
druid.host=127.0.0.1:8082
@ -146,7 +152,8 @@ To create the compute config file:
mkdir config/compute
```
Under the directory we just created, create the file ```runtime.properties``` with the following contents:
Under the directory we just created, create the file `runtime.properties` with the following contents:
```
druid.host=127.0.0.1:8081
druid.port=8081
@ -219,67 +226,17 @@ To start the broker node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath lib/*:config/broker com.metamx.druid.http.BrokerMain
```
<!--
### Optional: Start a Realtime Node ###
```
druid.host=127.0.0.1:8083
druid.port=8083
druid.service=realtime
# logging
com.metamx.emitter.logging=true
com.metamx.emitter.logging.level=info
# zk
druid.zk.service.host=localhost
druid.zk.paths.base=/druid
druid.zk.paths.discoveryPath=/druid/discoveryPath
# processing
druid.processing.buffer.sizeBytes=10000000
# schema
druid.realtime.specFile=realtime.spec
# aws
com.metamx.aws.accessKey=dummy_access_key
com.metamx.aws.secretKey=dummy_secret_key
# db
druid.database.segmentTable=segments
druid.database.user=druid
druid.database.password=diurd
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
druid.database.ruleTable=rules
druid.database.configTable=config
# Path on local FS for storage of segments; dir will be created if needed
druid.paths.indexCache=/tmp/druid/indexCache
# handoff
druid.pusher.s3.bucket=dummy_s3_bucket
druid.pusher.s3.baseKey=dummy_key
```
To start the realtime node:
```bash
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath services/target/druid-services-*-selfcontained.jar:config/realtime com.metamx.druid.realtime.RealtimeMain
```
-->
## Loading the Data ##
The MySQL dependency we introduced earlier on contains a 'segments' table that contains entries for segments that should be loaded into our cluster. The Druid master compares this table with segments that already exist in the cluster to determine what should be loaded and dropped. To load our wikipedia segment, we need to create an entry in our MySQL segment table.
Usually, when new segments are created, these MySQL entries are created directly so you never have to do this by hand. For this tutorial, we can do this manually by going back into MySQL and issuing:
```
``` sql
use druid;
```
``
INSERT INTO segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
``
```
If you look in your master node logs, you should, after a maximum of a minute or so, see logs of the following form:
@ -294,9 +251,9 @@ When the segment completes downloading and ready for queries, you should see the
2013-08-08 22:48:41,959 INFO [ZkCoordinator-0] com.metamx.druid.coordination.BatchDataSegmentAnnouncer - Announcing segment[wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z] at path[/druid/segments/127.0.0.1:8081/2013-08-08T22:48:41.959Z]
```
At this point, we can query the segment. For more information on querying, see this[link](https://github.com/metamx/druid/wiki/Querying).
At this point, we can query the segment. For more information on querying, see this [link](https://github.com/metamx/druid/wiki/Querying).
## Next Steps ##
Now that you have an understanding of what the Druid clsuter looks like, why not load some of your own data?
Check out the [Loading Your Own Data](https://github.com/metamx/druid/wiki/Loading-Your-Data) section for more info!
Check out the [Loading Your Own Data](https://github.com/metamx/druid/wiki/Loading-Your-Data) section for more info!

View File

@ -1,345 +1,307 @@
---
layout: default
layout: doc_page
---
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
About the data
--------------
The data source well be working with is the Bit.ly USA Government website statistics stream. You can see the stream [here](http://developer.usa.gov/1usagov), and read about the stream [here](http://www.usa.gov/About/developer-resources/1usagov.shtml) . This is a feed of json data that gets updated whenever anyone clicks a bit.ly shortened USA.gov website. A typical event might look something like this:
\`\`\`json
The data source we'll be working with is the Bit.ly USA Government website statistics stream. You can see the stream [here](http://developer.usa.gov/1usagov), and read about the stream [here](http://www.usa.gov/About/developer-resources/1usagov.shtml) . This is a feed of json data that gets updated whenever anyone clicks a bit.ly shortened USA.gov website. A typical event might look something like this:
```json
{
[user\_agent]() “Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)”,
[country]() “US”,
[known\_user]() 1,
[timezone]() “America/New\_York”,
[geo\_region]() “DC”,
[global\_bitly\_hash]() “17ctAFs”,
[encoding\_user\_bitly\_hash]() “17ctAFr”,
[encoding\_user\_login]() “senrubiopress”,
[aaccept\_language]() “en-US”,
[short\_url\_cname]() “1.usa.gov”,
[referring\_url]() “http://t.co/4Av4NUFAYq”,
[long\_url]() “http://www.rubio.senate.gov/public/index.cfm/fighting-for-florida?ID=c8357d12-9da8-4e9d-b00d-7168e1bf3599”,
[timestamp]() 1372190407,
[timestamp of time hash was created]() 1372190097,
[city]() “Washington”,
[latitude\_longitude]() [
38.893299,
~~77.014603
]
"user_agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
"country": "US",
"known_user": 1,
"timezone": "America/New_York",
"geo_region": "DC",
"global_bitly_hash": "17ctAFs",
"encoding_user_bitly_hash": "17ctAFr",
"encoding_user_login": "senrubiopress",
"aaccept_language": "en-US",
"short_url_cname": "1.usa.gov",
"referring_url": "http://t.co/4Av4NUFAYq",
"long_url": "http://www.rubio.senate.gov/public/index.cfm/fighting-for-florida?ID=c8357d12-9da8-4e9d-b00d-7168e1bf3599",
"timestamp": 1372190407,
"timestamp of time hash was created": 1372190097,
"city": "Washington",
"latitude_longitude": [ 38.893299, -77.014603 ]
}
\`\`\`
The “known\_user” field is always 1 or 0. It is 1 if the user is known to the server, and 0 otherwise. We will use this field extensively in this demo.
```
The "known_user" field is always 1 or 0. It is 1 if the user is known to the server, and 0 otherwise. We will use this field extensively in this demo.
h2. Setting Up
There are two ways to setup Druid: download a tarball, or ]. You only need to do one of these.
There are two ways to setup Druid: download a tarball, or [Build From Source](Build-From-Source.html). You only need to do one of these.
h3. Download a Tarball
Weve built a tarball that contains everything youll need. Youll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.50-bin.tar.gz)
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.50-bin.tar.gz)
Download this file to a directory of your choosing.
You can extract the awesomeness within by issuing:
\<pre\>tar~~zxvf druid-services~~**~~bin.tar.gz\</pre\>
Not too lost so far right? Thats great! If you cd into the directory:
\<pre\>cd druid-services-0.5.50\</pre\>
```
tar zxvf druid-services-*-bin.tar.gz
```
Not too lost so far right? That's great! If you cd into the directory:
```
cd druid-services-0.5.50
```
You should see a bunch of files:
\* run\_example\_server.sh
\* run\_example\_client.sh
\* LICENSE, config, examples, lib directories
* run_example_server.sh
* run_example_client.sh
* LICENSE, config, examples, lib directories
h2. Running Example Scripts
Lets start doing stuff. You can start a Druid ] node by issuing:
\<pre\>./run\_example\_server.sh\</pre\>
Select “webstream”.
Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
```
./run_example_server.sh
```
Select "webstream".
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
\<pre\><code>
```
2013-07-19 21:54:05,154 INFO com.metamx.druid.realtime.RealtimeNode~~ Starting Jetty
2013-07-19 21:54:05,154 INFO org.mortbay.log - jetty-6.1.x
2013-07-19 21:54:05,171 INFO com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at
2013-07-19 21:54:05,246 INFO org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
</code>\</pre\>
```
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
Okay, things are about to get real. To query the real-time node youve spun up, you can issue:
\<pre\>./run\_example\_client.sh\</pre\>
Select “webstream” once again. This script issues ]s to the data weve been ingesting. The query looks like this:
\`\`\`json
Okay, things are about to get real. To query the real-time node you've spun up, you can issue:
```
./run_example_client.sh
```
Select "webstream" once again. This script issues [GroupByQuery](GroupByQuery.html)s to the data we've been ingesting. The query looks like this:
```json
{
[queryType]() “groupBy”,
[dataSource]() “webstream”,
[granularity]() “minute”,
[dimensions]() [
“timezone”
],
[aggregations]() [
{
[type]() “count”,
[name]() “rows”
},
{
[type]() “doubleSum”,
[fieldName]() “known\_users”,
[name]() “known\_users”
}
],
[filter]() {
[type]() “selector”,
[dimension]() “country”,
[value]() “US”
},
[intervals]() [
“2013-06-01T00:00/2020-01-01T00”
]
"queryType": "groupBy",
"dataSource": "webstream",
"granularity": "minute",
"dimensions": [ "timezone" ],
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
],
"filter": { "type": "selector", "dimension": "country", "value": "US" },
"intervals": [ "2013-06-01T00:00/2020-01-01T00" ]
}
\`\`\`
This is a****groupBy**\* query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“country”** dimension, to only look at website hits in the US. Our **aggregations** are what we are calculating: a row count, and the sum of the number of known users in our data.
```
This is a `groupBy` query, which you may be familiar with from SQL. We are grouping, or aggregating, via the `dimensions` field: . We are **filtering** via the `"country"` dimension, to only look at website hits in the US. Our **aggregations** are what we are calculating: a row count, and the sum of the number of known users in our data.
The result looks something like this:
\`\`\`json
```json
[
{
[version]() “v1”,
[timestamp]() “2013-07-18T19:39:00.000Z”,
[event]() {
[timezone]() “America/Chicago”,
[known\_users]() 10,
[rows]() 15
}
},
{
[version]() “v1”,
[timestamp]() “2013-07-18T19:39:00.000Z”,
[event]() {
[timezone]() “America/Los\_Angeles”,
[known\_users]() 0,
[rows]() 3
}
},
\`\`\`
This groupBy query is a bit complicated and well return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
{
"version": "v1",
"timestamp": "2013-07-18T19:39:00.000Z",
"event": { "timezone": "America/Chicago", "known_users": 10, "rows": 15 }
},
{
"version": "v1",
"timestamp": "2013-07-18T19:39:00.000Z",
"event": { "timezone": "America/Los_Angeles", "known_users": 0, "rows": 3 }
},
...
```
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
h2. Querying Druid
In your favorite editor, create the file:
\<pre\>time\_boundary\_query.body\</pre\>
```
time_boundary_query.body
```
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
\<pre\><code>
```
{
[queryType]() “timeBoundary”,
[dataSource]() “webstream”
"queryType": "timeBoundary",
"dataSource": "webstream"
}
</code>\</pre\>
The ] is one of the simplest Druid queries. To run the query, you can issue:
\<pre\><code> curl~~X POST http://localhost:8083/druid/v2/?pretty ~~H content-type: application/json~~d ```` time_boundary_query.body</code></pre>
```
The [TimeBoundaryQuery](TimeBoundaryQuery.html) is one of the simplest Druid queries. To run the query, you can issue:
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d time_boundary_query.body
```
We get something like this JSON back:
```json
[
{
"timestamp": "2013-07-18T19:39:00.000Z",
"result": {
"minTime": "2013-07-18T19:39:00.000Z",
"maxTime": "2013-07-18T19:46:00.000Z"
}
}
{
"timestamp": "2013-07-18T19:39:00.000Z",
"result": {
"minTime": "2013-07-18T19:39:00.000Z",
"maxTime": "2013-07-18T19:46:00.000Z"
}
}
]
```
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
Return to your favorite editor and create the file:
<pre>timeseries_query.body</pre>
```
timeseries_query.body
```
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
<pre><code>
```json
{
"queryType": "timeseries",
"dataSource": "webstream",
"intervals": [
"2010-01-01/2020-01-01"
],
"granularity": "all",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "doubleSum",
"fieldName": "known_users",
"name": "known_users"
}
]
"queryType": "timeseries",
"dataSource": "webstream",
"intervals": [ "2010-01-01/2020-01-01" ],
"granularity": "all",
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
]
}
</code></pre>
```
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
To issue the query and get some results, run the following in your command line:
<pre><code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
</pre>
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d timeseries_query.body
```
Once again, you should get a JSON blob of text back with your results, that looks something like this:
\`\`\`json
```json
[
{
“timestamp” : “2013-07-18T19:39:00.000Z”,
“result” : {
“known\_users” : 787.0,
“rows” : 2004
}
}
{
"timestamp" : "2013-07-18T19:39:00.000Z",
"result" : { "known_users" : 787.0, "rows" : 2004 }
}
]
\`\`\`
```
If you issue the query again, you should notice your results updating.
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct! We can specify different granularities to bucket our results, like so:
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct! We can specify different granularities to bucket our results, like so:
<code>
{
"queryType": "timeseries",
"dataSource": "webstream",
"intervals": [
"2010-01-01/2020-01-01"
],
"granularity": "minute",
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "doubleSum",
"fieldName": "known_users",
"name": "known_users"
}
]
}
</code>
```json
{
"queryType": "timeseries",
"dataSource": "webstream",
"intervals": [ "2010-01-01/2020-01-01" ],
"granularity": "minute",
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
]
}
```
This gives us something like the following:
\`\`\`json
```json
[
{
[timestamp]() “2013-07-18T19:39:00.000Z”,
[result]() {
[known\_users]() 33,
[rows]() 76
}
},
{
[timestamp]() “2013-07-18T19:40:00.000Z”,
[result]() {
[known\_users]() 105,
[rows]() 221
}
},
{
[timestamp]() “2013-07-18T19:41:00.000Z”,
[result]() {
[known\_users]() 53,
[rows]() 167
}
},
\`\`\`
{
"timestamp": "2013-07-18T19:39:00.000Z",
"result": { "known_users": 33, "rows": 76 }
},
{
"timestamp": "2013-07-18T19:40:00.000Z",
"result": { "known_users": 105, "rows": 221 }
},
{
"timestamp": "2013-07-18T19:41:00.000Z",
"result": { "known_users": 53, "rows": 167 }
},
...
```
Solving a Problem
-----------------
One of Druids main powers is to provide answers to problems, so lets pose a problem. What if we wanted to know what the top states in the US are, ordered by the number of visits by known users over the last few minutes? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
One of Druid's main powers is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top states in the US are, ordered by the number of visits by known users over the last few minutes? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
Lets create the file:
Let's create the file:
group_by_query.body</pre>
and put the following in there:
<pre><code>
{
"queryType": "groupBy",
"dataSource": "webstream",
"granularity": "all",
"dimensions": [
"geo_region"
```
group_by_query.body
```
and put the following in there:
```
{
"queryType": "groupBy",
"dataSource": "webstream",
"granularity": "all",
"dimensions": [ "geo_region" ],
"orderBy": {
"type": "default",
"columns": [
{ "dimension": "known_users", "direction": "DESCENDING" }
],
"orderBy": {
"type": "default",
"columns": [
{
"dimension": "known_users",
"direction": "DESCENDING"
}
],
"limit": 10
},
"aggregations": [
{
"type": "count",
"name": "rows"
},
{
"type": "doubleSum",
"fieldName": "known_users",
"name": "known_users"
}
],
"filter": {
"type": "selector",
"dimension": "country",
"value": "US"
},
"intervals": [
"2012-10-01T00:00/2020-01-01T00"
]
}
</code>
"limit": 10
},
"aggregations": [
{ "type": "count", "name": "rows" },
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
],
"filter": { "type": "selector", "dimension": "country", "value": "US" },
"intervals": [ "2012-10-01T00:00/2020-01-01T00" ]
}
```
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects weve introduced to our query can help define the format of our results and provide an answer to our question.
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
If you issue the query:
<code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
```
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body
```
You should see an answer to our question. For my stream, it looks like this:
\`\`\`json
```json
[
{
[version]() “v1”,
[timestamp]() “2012-10-01T00:00:00.000Z”,
[event]() {
[geo\_region]() “RI”,
[known\_users]() 359,
[rows]() 143
}
},
{
[version]() “v1”,
[timestamp]() “2012-10-01T00:00:00.000Z”,
[event]() {
[geo\_region]() “NY”,
[known\_users]() 187,
[rows]() 322
}
},
{
[version]() “v1”,
[timestamp]() “2012-10-01T00:00:00.000Z”,
[event]() {
[geo\_region]() “CA”,
[known\_users]() 145,
[rows]() 466
}
},
{
[version]() “v1”,
[timestamp]() “2012-10-01T00:00:00.000Z”,
[event]() {
[geo\_region]() “IL”,
[known\_users]() 121,
[rows]() 185
}
},
\`\`\`
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": { "geo_region": "RI", "known_users": 359, "rows": 143 }
},
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": { "geo_region": "NY", "known_users": 187, "rows": 322 }
},
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": { "geo_region": "CA", "known_users": 145, "rows": 466 }
},
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": { "geo_region": "IL", "known_users": 121, "rows": 185 }
},
...
```
Feel free to tweak other query parameters to answer other questions you may have about the data.

View File

@ -1,329 +0,0 @@
---
layout: default
---
Greetings! We see youve taken an interest in Druid. Thats awesome! Hopefully this tutorial will help clarify some core Druid concepts. We will go through one of the Real-time [Examples](Examples.html), and issue some basic Druid queries. The data source well be working with is the [Twitter spritzer stream](https://dev.twitter.com/docs/streaming-apis/streams/public). If you are ready to explore Druid, brave its challenges, and maybe learn a thing or two, read on!
Setting Up
----------
There are two ways to setup Druid: download a tarball, or build it from source.
### Download a Tarball
Weve built a tarball that contains everything youll need. Youll find it [here](http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz).
Download this bad boy to a directory of your choosing.
You can extract the awesomeness within by issuing:
tar -zxvf druid-services-0.4.6.tar.gz
Not too lost so far right? Thats great! If you cd into the directory:
cd druid-services-0.4.6-SNAPSHOT
You should see a bunch of files:
\* run\_example\_server.sh
\* run\_example\_client.sh
\* LICENSE, config, examples, lib directories
### Clone and Build from Source
The other way to setup Druid is from source via git. To do so, run these commands:
\`\`\`
git clone git@github.com:metamx/druid.git
cd druid
git checkout druid-0.4.32-branch
./build.sh
\`\`\`
You should see a bunch of files:
\`\`\`
DruidCorporateCLA.pdf README common examples indexer pom.xml server
DruidIndividualCLA.pdf build.sh doc group\_by.body install publications services
LICENSE client eclipse\_formatting.xml index-common merger realtime
\`\`\`
You can find the example executables in the examples/bin directory:
\* run\_example\_server.sh
\* run\_example\_client.sh
Running Example Scripts
-----------------------
Lets start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
./run_example_server.sh
Select “twitter”.
Youll need to register a new application with the twitter API, which only takes a minute. Go to [https://twitter.com/oauth\_clients/new](https://twitter.com/oauth_clients/new) and fill out the form and submit. Dont worry, the home page and callback url can be anything. This will generate keys for the Twitter example application. Take note of the values for consumer key/secret and access token/secret.
Enter your credentials when prompted.
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below. If you see crazy exceptions, you probably typed in your login information incorrectly.
<code>
2013-05-17 23:04:40,934 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8080
2013-05-17 23:04:40,935 INFO [main] com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler - Invoking start method[public void com.metamx.druid.http.FileRequestLogger.start()] on object[com.metamx.druid.http.FileRequestLogger@42bb0406].
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Connection established.
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] druid.examples.twitter.TwitterSpritzerFirehoseFactory - Connected_to_Twitter
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Receiving status stream.
</code>
Periodically, youll also see messages of the form:
<code>
2013-05-17 23:04:59,793 INFO [chief-twitterstream] druid.examples.twitter.TwitterSpritzerFirehoseFactory - nextRow() has returned 1,000 InputRows
</code>
These messages indicate you are ingesting events. The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. Persisting to disk generates a whole bunch of logs:
<code>
2013-05-17 23:06:40,918 INFO [chief-twitterstream] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Submitting persist runnable for dataSource[twitterstream]
2013-05-17 23:06:40,920 INFO [twitterstream-incremental-persist] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - DataSource[twitterstream], Interval[2013-05-17T23:00:00.000Z/2013-05-18T00:00:00.000Z], persisting Hydrant[FireHydrant{index=com.metamx.druid.index.v1.IncrementalIndex@126212dd, queryable=com.metamx.druid.index.IncrementalIndexSegment@64c47498, count=0}]
2013-05-17 23:06:40,937 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting persist for interval[2013-05-17T23:00:00.000Z/2013-05-17T23:07:00.000Z], rows[4,666]
2013-05-17 23:06:41,039 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed index.drd in 11 millis.
2013-05-17 23:06:41,070 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed dim conversions in 31 millis.
2013-05-17 23:06:41,275 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.CompressedPools - Allocating new chunkEncoder[1]
2013-05-17 23:06:41,332 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed walk through of 4,666 rows in 262 millis.
2013-05-17 23:06:41,334 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[htags] with cardinality[634]
2013-05-17 23:06:41,381 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[htags] in 49 millis.
2013-05-17 23:06:41,382 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[lang] with cardinality[19]
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[lang] in 17 millis.
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[utc_offset] with cardinality[32]
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[utc_offset] in 15 millis.
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed inverted.drd in 81 millis.
2013-05-17 23:06:41,425 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexIO$DefaultIndexIOHandler - Converting v8[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] to v9[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0]
2013-05-17 23:06:41,426 INFO [twitterstream-incremental-persist]
... ETC
</code>
The logs are about building different columns, probably not the most exciting stuff (they might as well be in Vulcan) if are you learning about Druid for the first time. Nevertheless, if you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
Okay, things are about to get real (~~time). To query the real-time node youve spun up, you can issue:
\<pre\>./run\_example\_client.sh\</pre\>
Select “twitter” once again. This script issues ]s to the twitter data weve been ingesting. The query looks like this:
\`\`\`json
{
[queryType]() “groupBy”,
[dataSource]() “twitterstream”,
[granularity]() “all”,
[dimensions]() ,
[aggregations]([)
{ [type]() “count”, [name]() “rows”},
{ [type]() “doubleSum”, [fieldName]() “tweets”, [name]() “tweets”}
],
[filter]() { [type]() “selector”, [dimension]() “lang”, [value]() “en” },
[intervals](["2012-10-01T00:00/2020-01-01T00"])
}
\`\`\`
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“lang”** dimension, to only look at english tweets. Our **aggregations** are what we are calculating: a row count, and the sum of the tweets in our data.
The result looks something like this:
\`\`\`json
[
{
[version]() “v1”,
[timestamp]() “2012-10-01T00:00:00.000Z”,
[event]() {
[utc\_offset]() “~~10800",
[tweets]() 90,
[lang]() "en",
[rows]() 81
}
},
{
[version]() "v1",
[timestamp]() "2012-10-01T00:00:00.000Z",
[event]() {
[utc\_offset]() "~~14400”,
[tweets]() 177,
[lang]() “en”,
[rows]() 154
}
},
\`\`\`
This data, plotted in a time series/distribution, looks something like this:
![Timezone / Tweets Scatter Plot](http://metamarkets.com/wp-content/uploads/2013/06/tweets_timezone_offset.png "Timezone / Tweets Scatter Plot")
This groupBy query is a bit complicated and well return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
h2. Querying Druid
In your favorite editor, create the file:
\<pre\>time\_boundary\_query.body\</pre\>
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
\<pre\><code>
</code>\</pre\>
The ] is one of the simplest Druid queries. To run the query, you can issue:
\<pre\><code> curl~~X POST http://localhost:8080/druid/v2/?pretty ~~H content-type: application/json~~d ```` time_boundary_query.body</code></pre>
We get something like this JSON back:
```json
[ {
"timestamp" : "2013-06-10T19:09:00.000Z",
"result" : {
"minTime" : "2013-06-10T19:09:00.000Z",
"maxTime" : "2013-06-10T20:50:00.000Z"
}
} ]
```
That's the result. What information do you think the result is conveying?
...
If you said the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity), you are absolutely correct. I can see you are a person legitimately interested in learning about Druid. Let's explore a bit further.
Return to your favorite editor and create the file:
<pre>timeseries_query.body</pre>
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
<pre><code>{
"queryType":"timeseries",
"dataSource":"twitterstream",
"intervals":["2010-01-01/2020-01-01"],
"granularity":"all",
"aggregations":[
{ "type": "count", "name": "rows"},
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
]
}
</code></pre>
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
To issue the query and get some results, run the following in your command line:
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
</pre>
Once again, you should get a JSON blob of text back with your results, that looks something like this:
\`\`\`json
[ {
“timestamp” : “2013-06-10T19:09:00.000Z”,
“result” : {
“tweets” : 358562.0,
“rows” : 272271
}
} ]
\`\`\`
If you issue the query again, you should notice your results updating.
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct again! We can specify different granularities to bucket our results, like so:
\`\`\`json
{
[queryType]("timeseries"),
[dataSource]("twitterstream"),
[intervals](["2010-01-01/2020-01-01"]),
[granularity]("minute"),
[aggregations]([)
{ [type]() “count”, [name]() “rows”},
{ [type]() “doubleSum”, [fieldName]() “tweets”, [name]() “tweets”}
]
}
\`\`\`
This gives us something like the following:
\`\`\`json
[ {
“timestamp” : “2013-06-10T19:09:00.000Z”,
“result” : {
“tweets” : 2650.0,
“rows” : 2120
}
}, {
“timestamp” : “2013-06-10T19:10:00.000Z”,
“result” : {
“tweets” : 3401.0,
“rows” : 2609
}
}, {
“timestamp” : “2013-06-10T19:11:00.000Z”,
“result” : {
“tweets” : 3472.0,
“rows” : 2610
}
},
\`\`\`
Solving a Problem
-----------------
One of Druids main powers (see what we did there?) is to provide answers to problems, so lets pose a problem. What if we wanted to know what the top hash tags are, ordered by the number tweets, where the language is english, over the last few minutes youve been reading this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
Lets create the file:
group_by_query.body</pre>
and put the following in there:
<pre><code>{
"queryType": "groupBy",
"dataSource": "twitterstream",
"granularity": "all",
"dimensions": ["htags"],
"orderBy": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
"aggregations":[
{ "type": "longSum", "fieldName": "tweets", "name": "tweets"}
],
"filter": {"type": "selector", "dimension": "lang", "value": "en" },
"intervals":["2012-10-01T00:00/2020-01-01T00"]
}
</code>
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects weve introduced to our query can help define the format of our results and provide an answer to our question.
If you issue the query:
<code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
You should hopefully see an answer to our question. For my twitter stream, it looks like this:
\`\`\`json
[ {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“tweets” : 2660,
“htags” : “android”
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“tweets” : 1944,
“htags” : “E3”
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“tweets” : 1927,
“htags” : “15SueñosPendientes”
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“tweets” : 1717,
“htags” : “ipad”
}
}, {
“version” : “v1”,
“timestamp” : “2012-10-01T00:00:00.000Z”,
“event” : {
“tweets” : 1515,
“htags” : “IDidntTextYouBackBecause”
}
} ]
\`\`\`
Feel free to tweak other query parameters to answer other questions you may have about the data.
Additional Information
----------------------
This tutorial is merely showcasing a small fraction of what Druid can do. Next, continue on to [Loading Your Data](Loading Your Data.html).
And thus concludes our journey! Hopefully you learned a thing or two about Druid real-time ingestion, querying Druid, and how Druid can be used to solve problems. If you have additional questions, feel free to post in our [google groups page](http://www.groups.google.com/forum/#!forum/druid-development).

View File

@ -0,0 +1,327 @@
---
layout: doc_page
---
Greetings! We see you've taken an interest in Druid. That's awesome! Hopefully this tutorial will help clarify some core Druid concepts. We will go through one of the Real-time "Examples":Examples.html, and issue some basic Druid queries. The data source we'll be working with is the "Twitter spritzer stream":https://dev.twitter.com/docs/streaming-apis/streams/public. If you are ready to explore Druid, brave its challenges, and maybe learn a thing or two, read on!
h2. Setting Up
There are two ways to setup Druid: download a tarball, or build it from source.
h3. Download a Tarball
We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz.
Download this bad boy to a directory of your choosing.
You can extract the awesomeness within by issuing:
pre. tar -zxvf druid-services-0.4.6.tar.gz
Not too lost so far right? That's great! If you cd into the directory:
pre. cd druid-services-0.4.6-SNAPSHOT
You should see a bunch of files:
* run_example_server.sh
* run_example_client.sh
* LICENSE, config, examples, lib directories
h3. Clone and Build from Source
The other way to setup Druid is from source via git. To do so, run these commands:
<pre><code>git clone git@github.com:metamx/druid.git
cd druid
git checkout druid-0.4.32-branch
./build.sh
</code></pre>
You should see a bunch of files:
<pre><code>DruidCorporateCLA.pdf README common examples indexer pom.xml server
DruidIndividualCLA.pdf build.sh doc group_by.body install publications services
LICENSE client eclipse_formatting.xml index-common merger realtime
</code></pre>
You can find the example executables in the examples/bin directory:
* run_example_server.sh
* run_example_client.sh
h2. Running Example Scripts
Let's start doing stuff. You can start a Druid "Realtime":Realtime.html node by issuing:
<code>./run_example_server.sh</code>
Select "twitter".
You'll need to register a new application with the twitter API, which only takes a minute. Go to "https://twitter.com/oauth_clients/new":https://twitter.com/oauth_clients/new and fill out the form and submit. Don't worry, the home page and callback url can be anything. This will generate keys for the Twitter example application. Take note of the values for consumer key/secret and access token/secret.
Enter your credentials when prompted.
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below. If you see crazy exceptions, you probably typed in your login information incorrectly.
<pre><code>2013-05-17 23:04:40,934 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8080
2013-05-17 23:04:40,935 INFO [main] com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler - Invoking start method[public void com.metamx.druid.http.FileRequestLogger.start()] on object[com.metamx.druid.http.FileRequestLogger@42bb0406].
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Connection established.
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] druid.examples.twitter.TwitterSpritzerFirehoseFactory - Connected_to_Twitter
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Receiving status stream.
</code></pre>
Periodically, you'll also see messages of the form:
<pre><code>2013-05-17 23:04:59,793 INFO [chief-twitterstream] druid.examples.twitter.TwitterSpritzerFirehoseFactory - nextRow() has returned 1,000 InputRows
</code></pre>
These messages indicate you are ingesting events. The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. Persisting to disk generates a whole bunch of logs:
<pre><code>2013-05-17 23:06:40,918 INFO [chief-twitterstream] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Submitting persist runnable for dataSource[twitterstream]
2013-05-17 23:06:40,920 INFO [twitterstream-incremental-persist] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - DataSource[twitterstream], Interval[2013-05-17T23:00:00.000Z/2013-05-18T00:00:00.000Z], persisting Hydrant[FireHydrant{index=com.metamx.druid.index.v1.IncrementalIndex@126212dd, queryable=com.metamx.druid.index.IncrementalIndexSegment@64c47498, count=0}]
2013-05-17 23:06:40,937 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting persist for interval[2013-05-17T23:00:00.000Z/2013-05-17T23:07:00.000Z], rows[4,666]
2013-05-17 23:06:41,039 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed index.drd in 11 millis.
2013-05-17 23:06:41,070 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed dim conversions in 31 millis.
2013-05-17 23:06:41,275 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.CompressedPools - Allocating new chunkEncoder[1]
2013-05-17 23:06:41,332 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed walk through of 4,666 rows in 262 millis.
2013-05-17 23:06:41,334 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[htags] with cardinality[634]
2013-05-17 23:06:41,381 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[htags] in 49 millis.
2013-05-17 23:06:41,382 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[lang] with cardinality[19]
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[lang] in 17 millis.
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[utc_offset] with cardinality[32]
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[utc_offset] in 15 millis.
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed inverted.drd in 81 millis.
2013-05-17 23:06:41,425 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexIO$DefaultIndexIOHandler - Converting v8[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] to v9[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0]
2013-05-17 23:06:41,426 INFO [twitterstream-incremental-persist]
... ETC
</code></pre>
The logs are about building different columns, probably not the most exciting stuff (they might as well be in Vulcan) if are you learning about Druid for the first time. Nevertheless, if you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our "White Paper":http://static.druid.io/docs/druid.pdf.
Okay, things are about to get real (-time). To query the real-time node you've spun up, you can issue:
<pre>./run_example_client.sh</pre>
Select "twitter" once again. This script issues ["GroupByQuery":GroupByQuery.html]s to the twitter data we've been ingesting. The query looks like this:
<pre><code>{
"queryType": "groupBy",
"dataSource": "twitterstream",
"granularity": "all",
"dimensions": ["lang", "utc_offset"],
"aggregations":[
{ "type": "count", "name": "rows"},
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
],
"filter": { "type": "selector", "dimension": "lang", "value": "en" },
"intervals":["2012-10-01T00:00/2020-01-01T00"]
}
</code></pre>
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: ["lang", "utc_offset"]. We are **filtering** via the **"lang"** dimension, to only look at english tweets. Our **aggregations** are what we are calculating: a row count, and the sum of the tweets in our data.
The result looks something like this:
<pre><code>[
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": {
"utc_offset": "-10800",
"tweets": 90,
"lang": "en",
"rows": 81
}
},
{
"version": "v1",
"timestamp": "2012-10-01T00:00:00.000Z",
"event": {
"utc_offset": "-14400",
"tweets": 177,
"lang": "en",
"rows": 154
}
},
...
</code></pre>
This data, plotted in a time series/distribution, looks something like this:
!http://metamarkets.com/wp-content/uploads/2013/06/tweets_timezone_offset.png(Timezone / Tweets Scatter Plot)!
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have "curl":http://curl.haxx.se/ installed. Control+C to break out of the client script.
h2. Querying Druid
In your favorite editor, create the file:
<pre>time_boundary_query.body</pre>
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
<pre><code>{
"queryType" : "timeBoundary",
"dataSource" : "twitterstream"
}
</code></pre>
The "TimeBoundaryQuery":TimeBoundaryQuery.html is one of the simplest Druid queries. To run the query, you can issue:
<pre><code>
curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @time_boundary_query.body
</code></pre>
We get something like this JSON back:
<pre><code>[ {
"timestamp" : "2013-06-10T19:09:00.000Z",
"result" : {
"minTime" : "2013-06-10T19:09:00.000Z",
"maxTime" : "2013-06-10T20:50:00.000Z"
}
} ]
</code></pre>
That's the result. What information do you think the result is conveying?
...
If you said the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity), you are absolutely correct. I can see you are a person legitimately interested in learning about Druid. Let's explore a bit further.
Return to your favorite editor and create the file:
<pre>timeseries_query.body</pre>
We are going to make a slightly more complicated query, the "TimeseriesQuery":TimeseriesQuery.html. Copy and paste the following into the file:
<pre><code>{
"queryType":"timeseries",
"dataSource":"twitterstream",
"intervals":["2010-01-01/2020-01-01"],
"granularity":"all",
"aggregations":[
{ "type": "count", "name": "rows"},
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
]
}
</code></pre>
You are probably wondering, what are these "Granularities":Granularities.html and "Aggregations":Aggregations.html things? What the query is doing is aggregating some metrics over some span of time.
To issue the query and get some results, run the following in your command line:
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @timeseries_query.body</code></pre>
Once again, you should get a JSON blob of text back with your results, that looks something like this:
<pre><code>[ {
"timestamp" : "2013-06-10T19:09:00.000Z",
"result" : {
"tweets" : 358562.0,
"rows" : 272271
}
} ]
</code></pre>
If you issue the query again, you should notice your results updating.
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct again! We can specify different granularities to bucket our results, like so:
<pre><code>{
"queryType":"timeseries",
"dataSource":"twitterstream",
"intervals":["2010-01-01/2020-01-01"],
"granularity":"minute",
"aggregations":[
{ "type": "count", "name": "rows"},
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
]
}
</code></pre>
This gives us something like the following:
<pre><code>[ {
"timestamp" : "2013-06-10T19:09:00.000Z",
"result" : {
"tweets" : 2650.0,
"rows" : 2120
}
}, {
"timestamp" : "2013-06-10T19:10:00.000Z",
"result" : {
"tweets" : 3401.0,
"rows" : 2609
}
}, {
"timestamp" : "2013-06-10T19:11:00.000Z",
"result" : {
"tweets" : 3472.0,
"rows" : 2610
}
},
...
</code></pre>
h2. Solving a Problem
One of Druid's main powers (see what we did there?) is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top hash tags are, ordered by the number tweets, where the language is english, over the last few minutes you've been reading this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the "GroupByQuery":GroupByQuery.html. It would be nice if we could group by results by dimension value and somehow sort those results... and it turns out we can!
Let's create the file:
<pre>group_by_query.body</pre>
and put the following in there:
<pre><code>{
"queryType": "groupBy",
"dataSource": "twitterstream",
"granularity": "all",
"dimensions": ["htags"],
"orderBy": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
"aggregations":[
{ "type": "longSum", "fieldName": "tweets", "name": "tweets"}
],
"filter": {"type": "selector", "dimension": "lang", "value": "en" },
"intervals":["2012-10-01T00:00/2020-01-01T00"]
}
</code></pre>
Woah! Our query just got a way more complicated. Now we have these "Filters":Filters.html things and this "OrderBy":OrderBy.html thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
If you issue the query:
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code></pre>
You should hopefully see an answer to our question. For my twitter stream, it looks like this:
<pre><code>[ {
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : {
"tweets" : 2660,
"htags" : "android"
}
}, {
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : {
"tweets" : 1944,
"htags" : "E3"
}
}, {
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : {
"tweets" : 1927,
"htags" : "15SueñosPendientes"
}
}, {
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : {
"tweets" : 1717,
"htags" : "ipad"
}
}, {
"version" : "v1",
"timestamp" : "2012-10-01T00:00:00.000Z",
"event" : {
"tweets" : 1515,
"htags" : "IDidntTextYouBackBecause"
}
} ]
</code></pre>
Feel free to tweak other query parameters to answer other questions you may have about the data.
h2. Additional Information
This tutorial is merely showcasing a small fraction of what Druid can do. Next, continue on to "Loading Your Data":./Loading-Your-Data.html.
And thus concludes our journey! Hopefully you learned a thing or two about Druid real-time ingestion, querying Druid, and how Druid can be used to solve problems. If you have additional questions, feel free to post in our "google groups page":http://www.groups.google.com/forum/#!forum/druid-development.

View File

@ -1,5 +1,5 @@
---
layout: default
layout: doc_page
---
This page discusses how we do versioning and provides information on our stable releases.
@ -8,13 +8,13 @@ Versioning Strategy
We generally follow [semantic versioning](http://semver.org/). The general idea is
- “Major” version (leftmost): backwards incompatible, no guarantees exist about APIs between the versions
- “Minor” version (middle number): you can move forward from a smaller number to a larger number, but moving backwards *might* be incompatible.
- “bug-fix” version (“patch” or the rightmost): Interchangeable. The higher the number, the more things are fixed (hopefully), but the programming interfaces are completely compatible and you should be able to just drop in a new jar and have it work.
* "Major" version (leftmost): backwards incompatible, no guarantees exist about APIs between the versions
* "Minor" version (middle number): you can move forward from a smaller number to a larger number, but moving backwards *might* be incompatible.
* "bug-fix" version ("patch" or the rightmost): Interchangeable. The higher the number, the more things are fixed (hopefully), but the programming interfaces are completely compatible and you should be able to just drop in a new jar and have it work.
Note that this is defined in terms of programming API, **not** in terms of functionality. It is possible that a brand new awesome way of doing something is introduced in a “bug-fix” release version if it doesnt add to the public API or change it.
Note that this is defined in terms of programming API, **not** in terms of functionality. It is possible that a brand new awesome way of doing something is introduced in a "bug-fix" release version if it doesnt add to the public API or change it.
One exception for right now, while we are still in major version 0, we are considering the APIs to be in beta and are conflating “major” and “minor” so a minor version increase could be backwards incompatible for as long as we are at major version 0. These will be communicated via email on the group.
One exception for right now, while we are still in major version 0, we are considering the APIs to be in beta and are conflating "major" and "minor" so a minor version increase could be backwards incompatible for as long as we are at major version 0. These will be communicated via email on the group.
For external deployments, we recommend running the stable release tag. Releases are considered stable after we have deployed them into our production environment and they have operated bug-free for some time.

View File

@ -1,10 +1,10 @@
---
layout: default
layout: doc_page
---
Druid uses ZooKeeper (ZK) for management of current cluster state. The operations that happen over ZK are
1. [Master](Master.html) leader election
2. Segment “publishing” protocol from [Compute](Compute.html) and [Realtime](Realtime.html)
2. Segment "publishing" protocol from [Compute](Compute.html) and [Realtime](Realtime.html)
3. Segment load/drop protocol between [Master](Master.html) and [Compute](Compute.html)
### Property Configuration
@ -13,45 +13,59 @@ ZooKeeper paths are set via the `runtime.properties` configuration file. Druid w
There is a prefix path that is required and can be used as the only (well, kinda, see the note below) path-related zookeeper configuration parameter (everything else will be a default based on the prefix):
druid.zk.paths.base
```
druid.zk.paths.base
```
You can also override each individual path (defaults are shown below):
druid.zk.paths.propertiesPath=${druid.zk.paths.base}/properties
druid.zk.paths.announcementsPath=${druid.zk.paths.base}/announcements
druid.zk.paths.servedSegmentsPath=${druid.zk.paths.base}/servedSegments
druid.zk.paths.loadQueuePath=${druid.zk.paths.base}/loadQueue
druid.zk.paths.masterPath=${druid.zk.paths.base}/master
druid.zk.paths.indexer.announcementsPath=${druid.zk.paths.base}/indexer/announcements
druid.zk.paths.indexer.tasksPath=${druid.zk.paths.base}/indexer/tasks
druid.zk.paths.indexer.statusPath=${druid.zk.paths.base}/indexer/status
druid.zk.paths.indexer.leaderLatchPath=${druid.zk.paths.base}/indexer/leaderLatchPath
```
druid.zk.paths.propertiesPath=${druid.zk.paths.base}/properties
druid.zk.paths.announcementsPath=${druid.zk.paths.base}/announcements
druid.zk.paths.servedSegmentsPath=${druid.zk.paths.base}/servedSegments
druid.zk.paths.loadQueuePath=${druid.zk.paths.base}/loadQueue
druid.zk.paths.masterPath=${druid.zk.paths.base}/master
druid.zk.paths.indexer.announcementsPath=${druid.zk.paths.base}/indexer/announcements
druid.zk.paths.indexer.tasksPath=${druid.zk.paths.base}/indexer/tasks
druid.zk.paths.indexer.statusPath=${druid.zk.paths.base}/indexer/status
druid.zk.paths.indexer.leaderLatchPath=${druid.zk.paths.base}/indexer/leaderLatchPath
```
NOTE: We also use Curators service discovery module to expose some services via zookeeper. This also uses a zookeeper path, but this path is **not** affected by `druid.zk.paths.base` and **must** be specified separately. This property is
druid.zk.paths.discoveryPath
```
druid.zk.paths.discoveryPath
```
### Master Leader Election
We use the Curator LeadershipLatch recipe to do leader election at path
${druid.zk.paths.masterPath}/_MASTER
```
${druid.zk.paths.masterPath}/_MASTER
```
### Segment “publishing” protocol from Compute and Realtime
### Segment "publishing" protocol from Compute and Realtime
The `announcementsPath` and `servedSegmentsPath` are used for this.
All [Compute](Compute.html) and [Realtime](Realtime.html) nodes publish themselves on the `announcementsPath`, specifically, they will create an ephemeral znode at
${druid.zk.paths.announcementsPath}/${druid.host}
```
${druid.zk.paths.announcementsPath}/${druid.host}
```
Which signifies that they exist. They will also subsequently create a permanent znode at
${druid.zk.paths.servedSegmentsPath}/${druid.host}
```
${druid.zk.paths.servedSegmentsPath}/${druid.host}
```
And as they load up segments, they will attach ephemeral znodes that look like
${druid.zk.paths.servedSegmentsPath}/${druid.host}/_segment_identifier_
```
${druid.zk.paths.servedSegmentsPath}/${druid.host}/_segment_identifier_
```
Nodes like the [Master](Master.html) and [Broker](Broker.html) can then watch these paths to see which nodes are currently serving which segments.
@ -61,6 +75,8 @@ The `loadQueuePath` is used for this.
When the [Master](Master.html) decides that a [Compute](Compute.html) node should load or drop a segment, it writes an ephemeral znode to
${druid.zk.paths.loadQueuePath}/_host_of_compute_node/_segment_identifier
```
${druid.zk.paths.loadQueuePath}/_host_of_compute_node/_segment_identifier
```
This node will contain a payload that indicates to the Compute node what it should do with the given segment. When the Compute node is done with the work, it will delete the znode in order to signify to the Master that it is complete.

View File

@ -1,71 +0,0 @@
---
layout: default
---
Contents
\* [Introduction|Home](Introduction|Home.html)
\* [Download](Download.html)
\* [Support](Support.html)
\* [Contribute](Contribute.html)
========================
Getting Started
\* [Tutorial: A First Look at Druid](Tutorial:-A-First-Look-at-Druid.html)
\* [Tutorial: The Druid Cluster](Tutorial:-The-Druid-Cluster.html)
\* [Loading Your Data](Loading-Your-Data.html)
\* [Querying Your Data](Querying-Your-Data.html)
\* [Booting a Production Cluster](Booting-a-Production-Cluster.html)
\* [Examples](Examples.html)
\* [Cluster Setup](Cluster-Setup.html)
\* [Configuration](Configuration.html)
--------------------------------------
Data Ingestion
\* [Realtime](Realtime.html)
\* [Batch|Batch Ingestion](Batch|Batch-Ingestion.html)
\* [Indexing Service](Indexing-Service.html)
----------------------------
Querying
\* [Querying](Querying.html)
**\* ]
**\* [Aggregations](Aggregations.html)
**\* ]
**\* [Granularities](Granularities.html)
\* Query Types
**\* ]
****\* ]
****\* ]
**\* [SearchQuery](SearchQuery.html)
**\* ]
** [SegmentMetadataQuery](SegmentMetadataQuery.html)
**\* ]
**\* [TimeseriesQuery](TimeseriesQuery.html)
---------------------------
Architecture
\* [Design](Design.html)
\* [Segments](Segments.html)
\* Node Types
**\* ]
**\* [Broker](Broker.html)
**\* ]
****\* ]
**\* [Realtime](Realtime.html)
**\* ]
**\* [Plumber](Plumber.html)
\* External Dependencies
**\* ]
**\* [MySQL](MySQL.html)
**\* ]
** [Concepts and Terminology](Concepts-and-Terminology.html)
-------------------------------
Development
\* [Versioning](Versioning.html)
\* [Build From Source](Build-From-Source.html)
\* [Libraries](Libraries.html)
------------------------
Misc
\* [Thanks](Thanks.html)
-------------

View File

@ -0,0 +1,70 @@
---
---
* [Introduction|Home](Introduction|Home.html)
* [Download](Download.html)
* [Support](Support.html)
* [Contribute](Contribute.html)
========================
Getting Started
* [Tutorial: A First Look at Druid](Tutorial:-A-First-Look-at-Druid.html)
* [Tutorial: The Druid Cluster](Tutorial:-The-Druid-Cluster.html)
* [Loading Your Data](Loading-Your-Data.html)
* [Querying Your Data](Querying-Your-Data.html)
* [Booting a Production Cluster](Booting-a-Production-Cluster.html)
* [Examples](Examples.html)
* [Cluster Setup](Cluster-Setup.html)
* [Configuration](Configuration.html)
--------------------------------------
Data Ingestion
* [Realtime](Realtime.html)
* [Batch|Batch Ingestion](Batch|Batch-Ingestion.html)
* [Indexing Service](Indexing-Service.html)
----------------------------
Querying
* [Querying](Querying.html)
*** ]
*** [Aggregations](Aggregations.html)
*** ]
*** [Granularities](Granularities.html)
* Query Types
*** ]
***** ]
***** ]
*** [SearchQuery](SearchQuery.html)
*** ]
** [SegmentMetadataQuery](SegmentMetadataQuery.html)
*** ]
*** [TimeseriesQuery](TimeseriesQuery.html)
---------------------------
Architecture
* [Design](Design.html)
* [Segments](Segments.html)
* Node Types
*** ]
*** [Broker](Broker.html)
*** ]
***** ]
*** [Realtime](Realtime.html)
*** ]
*** [Plumber](Plumber.html)
* External Dependencies
*** ]
*** [MySQL](MySQL.html)
*** ]
** [Concepts and Terminology](Concepts-and-Terminology.html)
-------------------------------
Development
* [Versioning](Versioning.html)
* [Build From Source](Build-From-Source.html)
* [Libraries](Libraries.html)
------------------------
Misc
* [Thanks](Thanks.html)
-------------

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,592 +0,0 @@
@font-face {
font-family: 'Conv_framd';
src: url('../fonts/framd.eot');
src: url('../fonts/framd.eot?#iefix') format('embedded-opentype'),
url('../fonts/framd.woff') format('woff'),
url('../fonts/framd.ttf') format('truetype'),
url('../fonts/framd.svg#heroregular') format('svg');
font-weight: normal;
font-style: normal;
}
html, body {
position:relative;
height:100%;
min-height:100%;
height:100%;
color:#252525;
font:400 18px/26px 'Open Sans', Arial, Helvetica, sans-serif;
padding:0;
margin:0;
word-wrap:break-word;
}
a {
color:#6ab6dd;
position:relative;
}
a:hover {
text-decoration:underline;
color:#2c79a1;
}
.wrapper {
min-height:100%;
}
header {
margin:0 0 100px;
}
header .span12 {
padding:0 0 7px 0;
}
.logo.custom {
display:inline-block;
margin:0;
vertical-align:25px;
}
.logo.custom a {
background:url(../img/logo.png) no-repeat;
width: 110px;
height: 49px;
display:block;
text-indent:-9999px;
}
.custom.navbar {
margin:10px 0;
}
.custom.navbar .nav li {
padding:0 !important;
}
.custom.navbar .nav li a, .navbar .brand, .custom.navbar .nav li.pull-right span {
font:300 14px/20px 'Open Sans', Arial, Helvetica, sans-serif;
padding:10px 10px;
}
.navbar .brand.active {
color:#fff;
}
.custom.navbar .nav li.pull-right span {
padding-right:0;
color:#e76d4c !important;
display:block;
}
.custom.navbar .nav li a.doc-link {
padding:5px 10px 0;
}
.custom.navbar .nav li a.doc-link span {
display:inline-block;
background:url(../img/icon-git.png) no-repeat;
width: 28px;
height: 28px;
vertical-align:-7px;
margin-left:5px;
}
.custom.navbar .nav li a.doc-link:hover span, .custom.navbar .nav li.active a.doc-link span {
background-position:0 -28px;
}
.custom.navbar .nav {
float:none;
}
.navbar .nav > li.pull-right {
float:right;
padding:10px 0;
}
h1 {
font:300 48px/48px 'Open Sans', Arial, Helvetica, sans-serif;
margin:0 0 45px;
}
h1.index {
text-align:center;
}
h1 span {
display:block;
font:400 14px/28px 'Open Sans', Arial, Helvetica, sans-serif;
}
h2 {
font:30px/30px 'Conv_framd', Arial, Helvetica, sans-serif;
margin:0 0 20px;
color:#0f1e35;
}
h3 {
font:300 30px/36px 'Open Sans', Arial, Helvetica, sans-serif;
margin:0 0 33px;
text-align:center;
}
.btn {
display:block;
margin:0 auto 65px;
background:#6ab6dd;
border:none;
box-shadow:inset -3px -3px 3px #5592b1;
height:53px;
width:205px;
font:30px/53px 'Conv_framd', Arial, Helvetica, sans-serif;
color:#252424;
text-shadow:none;
padding:0;
z-index:100;
position:relative;
}
.btn a {
color:#252424;
}
.btn:hover {
background:#83c6e9;
}
.index-content {
margin:0 auto 60px;
text-align:center;
}
.third {
margin:0 auto 35px;
}
.third-item {
text-align:left;
}
.third-item:first-child {
margin:0;
}
.third-item a {
display:block;
font-family:'Open Sans', Arial, Helvetica, sans-serif;
font-weight:700;
font-size:30px;
margin:0 auto 20px;
color:#252424;
text-align:center;
}
.container.custom {
padding:0;
margin:0 auto;
}
.container.custom.main-cont {
padding-bottom:230px;
}
.text-part {
padding-top:70px;
}
.row-fluid.index-page {
padding-top:100px;
}
.index-page .content {
padding:15px 0 0;
}
.index-page h3 {
text-align:left;
}
.index-page .sidebar {
padding:65px 0 30px;
}
.container.custom p {
margin:0 0 17px;
}
.homepage .index-page .content h2 {
margin:0 0 20px;
}
.container.custom .unstyled {
margin:0;
color:#353535;
}
.container.custom .unstyled li {
margin:0 0 17px;
}
.container.custom .unstyled li span {
font-family:'Open Sans', Arial, Helvetica, sans-serif;
font-weight:700;
display:block;
}
.container.custom .unstyled li a {
display:inline;
}
.homepage h4 {
font:24px/24px 'Conv_framd', Arial, Helvetica, sans-serif;
margin:0 0 15px;
}
.container.custom .sidebar .unstyled {
margin:0 0 100px;
}
.container.custom .sidebar .unstyled li {
margin:0 0 12px;
border-bottom:1px solid #adadad;
padding: 5px 7px;
}
.grey-box {
background:#e5e4e3;
border-radius:3px;
-moz-border-radius:3px;
-webkit-border-radius:3px;
position:relative;
padding:20px 10px 130px;
color:#000;
}
footer {
text-align:center;
font-size:14px;
color:#000;
margin:-135px 0 0;
}
footer .container.custom {
border-top:1px solid #e1e1e1;
padding:20px 0 25px;
}
footer .span9 {
text-align:left;
}
footer .container.custom ul.unstyled {
display:inline-block;
margin:0 120px 30px 30px;
text-align:left;
vertical-align:top;
}
footer .container.custom ul.unstyled li {
font:300 14px/26px 'Open Sans', Arial, Helvetica, sans-serif;
margin:0;
}
footer .container.custom .unstyled li a {
color:#000;
font-weight:300;
}
footer .container.custom .unstyled li:first-child a {
font-weight:400;
}
footer ul li a:hover {
text-decoration:underline;
color:#fff;
}
footer .logo-block {
text-align:right;
}
footer .container.custom p {
display:inline-block;
margin:28px 0 0 10px;
text-align:left;
}
.contact-item {
margin:0 0 30px 30px;
text-align:left;
font-weight:300;
}
.contact-item a {
color:#000;
}
footer .contact-item span {
font-weight:400;
display:block;
}
footer .contact-item:first-child span {
text-transform:uppercase;
}
footer .span4 {
text-align:left;
}
footer .span5 {
padding-top: 75px;
}
.soc {
text-align:left;
margin:5px 0 0 0;
}
.soc a {
display:inline-block;
width:35px;
height:34px;
background:url(../img/icons-soc.png) no-repeat;
}
.soc a.github {
background-position:0 -34px;
}
.soc a.meet {
background-position:0 -68px;
}
.soc a.rss {
background-position:0 -102px;
}
.text-item {
margin:0 0 75px;
}
.container.custom p.note {
text-align:center;
padding:30px 0 0;
}
.text-item strong {
font-weight:normal;
font-family:'Open Sans', Arial, Helvetica, sans-serif;
font-weight:700;
}
h2.date {
font-family:'Open Sans', Arial, Helvetica, sans-serif;
font-weight:400;
}
.blog h2.date {
margin:0 0 25px;
}
h2.date span {
display:block;
margin:0 0 5px;
padding:0 0 15px;
font-size:20px;
border-bottom:1px solid #ccc;
}
.blog h2.date a {
font-weight:700;
}
.blog.inner h2.date span:first-child {
display:block;
font-size:30px;
font-weight:700;
padding:0;
border:none;
}
.blog.inner h3 {
text-align:left;
font-size:25px;
font-weight:700;
margin:0 0 15px;
}
.blog.inner ul li {
margin-left: 50px;
line-height:26px;
}
.recent h3 {
font-size: 25px;
font-weight: 700;
margin: 0 0 15px;
text-align: left;
}
.recent ul li.active a {
color:#252525;
}
.border {
width:130px;
margin: 45px auto;
border-top:1px solid #dfdfdf;
border-top:1px solid #81807f;
}
.text-img {
display:block;
margin:0 auto 17px;
}
.indent p, .indent ul {
padding:0 0 0 50px;
}
.span3 {
margin-left:0;
}
.nav.nav-list.bs-docs-sidenav {
border:1px solid #e5e5e5;
border-radius:5px;
box-shadow:0 0 3px #f9f9f9;
padding:0;
width:auto;
}
.nav.nav-list.bs-docs-sidenav li {
border-top:1px solid #e5e5e5;
}
.nav.nav-list.bs-docs-sidenav li:first-child {
border:none;
}
.nav.nav-list.bs-docs-sidenav li:first-child a {
border-radius:5px 5px 0 0;
-moz-border-radius:5px 5px 0 0;
-webkit-border-radius:5px 5px 0 0;
}
.nav.nav-list.bs-docs-sidenav li:last-child, .nav.nav-list.bs-docs-sidenav li:last-child a {
border-radius:0 0 5px 5px;
-moz-border-radius:0 0 5px 5px;
-webkit-border-radius:0 0 5px 5px;
}
.nav.nav-list.bs-docs-sidenav li a {
padding:10px;
margin:0;
font-weight:400;
font-size:14px;
line-height:18px;
}
.icon-chevron-right {
float: right;
margin-right: -6px;
margin-top: 2px;
opacity: 0.25;
}
.indent ul li {
line-height:26px;
}
.span8 h3 {
text-align:left;
margin:0 0 50px;
}
.span8 h3 a {
font-weight:800;
}
.span8 h4 {
font:700 18px/26px 'Open Sans', Arial, Helvetica, sans-serif;
margin:0 0 20px;
}
.span8 p span {
font-weight:700;
}
header.index-head {
background:#f9f9f9;
margin:0 0 30px;
}
header.index-head .span12 {
margin-bottom:80px;
}
.index-content h2 {
text-align:center;
}
.third-item img {
display:block;
margin:0 auto 70px;
}
.container.custom .third-item p {
margin:0 0 0 20px;
}
.row-fluid {
margin:0;
padding:0;
}
.nav-list [class^="icon-"] {
margin-right:-2px;
}
@media (min-width: 1200px) {
.custom.navbar .nav li a, .navbar .brand, .custom.navbar .nav li.pull-right span {
padding: 10px 20px;
font-size:16px;
}
.nav.nav-list.bs-docs-sidenav {
width:258px;
}
.container.custom .recent ul.unstyled {
margin-right:100px;
}
}
@media (max-width: 980px) {
.container.custom {
width:95%;
}
.bs-docs-sidenav.affix {
position: static;
top: 0;
width: 100%;
}
.nav.nav-list.bs-docs-sidenav {
width:100%;
margin-bottom:20px;
}
}
@media only screen
and (min-device-width : 710px)
and (max-device-width : 770px) {
.container.custom {
width:700px;
position:relative;
}
.custom.navbar .nav li {
font-size:22px;
padding:0 10px;
}
.nav.nav-list.bs-docs-sidenav.affix {
position:fixed;
top:175px;
width:218px;
}
}
@media only screen and (min-device-width : 770px)
and (max-device-width : 810px) {
.container.custom {
width:700px;
}
.custom.navbar .nav li {
font-size:15px;
padding:0 15px;
}
.custom.navbar .nav {
margin-left:30px;
}
}
@media only screen
and (min-device-width : 320px)
and (max-device-width : 480px) {
.container.custom {
width:100%;
margin:0;
}
footer .logo-block {
text-align: left;
padding-left:30px;
}
.offset1, .row-fluid .offset1:first-child {
margin:0;
}
.indent p, .indent ul {
padding:0;
}
.indent, .blog, .recent h3, .recent ul, .text-part {
padding:0 20px;
}
.index-head h3 {
font-size:20px;
}
.index-content {
padding:0 20px;
}
.content h2, .content h3, .content ul.unstyled, .sidebar h3 {
padding:0 20px;
}
h1 {
padding:0 20px;
}
}
@media (max-width: 320px) {
.container.custom {
width:100%;
}
footer .logo-block {
text-align: left;
padding-left:30px;
}
footer .container.custom p {
margin-top:10px;
}
.offset1, .row-fluid .offset1:first-child {
margin:0;
}
.indent p, .indent ul {
padding:0;
}
.indent, .blog, .recent h3, .recent ul, .text-part {
padding:0 20px;
}
.index-head h3 {
font-size:25px;
line-height:30px;
padding:0 20px;
}
.index-content {
padding:0 20px;
}
.content h2, .content h3, .content ul.unstyled, .sidebar h3 {
padding:0 20px;
}
h1 {
padding:0 20px;
}
}
.container.custom .faq-page p {
margin-bottom:10px;
}
.index-head h3 {
margin-bottom:50px;
}
h1.center {
text-align:center;
}
.btn.btn-navbar {
height:auto;
width:auto;
margin:10px 0 0;
}
.navbar-inner {
z-index:100000;
position:relative;
}

View File

@ -1,147 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Druid | {{page.title}}</title>
<link rel="stylesheet" type="text/css" href="/css/bootstrap.css" media="all" />
<link rel="stylesheet" type="text/css" href="/css/bootstrap-responsive.css" media="all" />
<link rel="stylesheet" type="text/css" href="/css/syntax.css" media="all" />
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
<script src="http://code.jquery.com/jquery.js"></script>
<script src="/js/bootstrap.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<div class="wrapper">
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
<div class="container custom">
<div class="row-fluid">
<div class="span12">
<div class="navbar navbar-inverse custom">
<div class="navbar-inner">
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
<div class="nav-collapse collapse">
<ul class="nav">
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
<a href="/druid.html">What is Druid?</a>
</li>
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
<a href="/downloads.html">Downloads</a>
</li>
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
</li>
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
<a href="/community.html">Community</a>
</li>
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
<a href="/faq.html">FAQ</a>
</li>
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
<a href="/blog">Blog</a>
</li>
<li class="pull-right">
<span>BETA</span>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
{% if page.id == 'home' %}
<h3>Druid is open-source infrastructure for real&sup2;time exploratory analytics on large datasets.</h3>
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
{% endif %}
</div>
</header>
<div class="container custom main-cont">
{{ content }}
</div>
</div>
<footer>
<div class="container custom">
<div class="row-fluid">
<div class="span3">
<div class="contact-item">
<span>CONTACT US</span>
<a href="mailto:info@druid.io">info@druid.io</a>
</div>
<div class="contact-item">
<span>Metamarkets</span>
625 2nd Street, Suite #230<br/>
San Francisco, CA 94017
<div class="soc">
<a href="https://twitter.com/druidio"></a>
<a href="https://github.com/metamx/druid" class="github"></a>
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
</div>
</div>
</div>
<div class="span9">
<ul class="unstyled">
<li>
<a href="/">DRUID</a>
</li>
<li>
<a href="/druid.html">What is Druid?</a>
</li>
<li>
<a href="/downloads.html">Downloads</a>
</li>
<li>
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/community.html">SUPPORT</a>
</li>
<li>
<a href="/community.html">Community</a>
</li>
<li>
<a href="/faq.html">FAQ</a>
</li>
<li>
<a href="/licensing.html">Licensing</a>
</li>
</ul>
<ul class="unstyled">
<li>
<a href="/blog">BLOG</a>
</li>
</ul>
<div class="logo-block">
<span class="logo custom">
<a href="/"></a>
</span>
<p>is an open source project sponsored by<br/> Metamarkets.</p>
</div>
</div>
</div>
</div>
</footer>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-40280432-1");
pageTracker._trackPageview();
} catch(err) {}
</script>
</body>
</html>

56
docs/content/css/docs.css Normal file
View File

@ -0,0 +1,56 @@
.doc-content pre, .doc-content pre code {
overflow: auto;
white-space: pre;
word-wrap: normal;
}
.doc-content p {
margin: 18px 0 18px 0;
}
/*** HACK: This is a horrible hack, but I have no clue why images don't want to stay in the container **/
.doc-content img {
max-width: 847.5px;
}
.doc-content code {
background-color: #e0e0e0;
}
.doc-content pre code {
background-color: transparent;
}
.doc-content table,
.doc-content pre {
margin: 35px 0 35px 0;
}
.doc-content table,
.doc-content table > thead > tr > th,
.doc-content table > tbody > tr > th,
.doc-content table > tfoot > tr > th,
.doc-content table > thead > tr > td,
.doc-content table > tbody > tr > td,
.doc-content table > tfoot > tr > td {
border: 1px solid #dddddd;
}
.doc-content table > thead > tr > th,
.doc-content table > thead > tr > td {
border-bottom-width: 2px;
}
.doc-content table > tbody > tr:nth-child(odd) > td,
.doc-content table > tbody > tr:nth-child(odd) > th {
background-color: #f9f9f9;
}
.doc-content table > tbody > tr:hover > td,
.doc-content table > tbody > tr:hover > th {
background-color: #d5d5d5;
}
.doc-content table code {
background-color: transparent;
}

View File

@ -1,8 +0,0 @@
---
layout: default
---
<div class="row-fluid">
{{ content }}
</div>

View File

@ -1,160 +0,0 @@
/*****************************************************************************/
/*
/* Common
/*
/*****************************************************************************/
/* Global Reset */
* {
margin: 0;
padding: 0;
}
html, body { height: 100%; }
body {
background-color: #FFF;
font: 13.34px Helvetica, Arial, sans-serif;
font-size: small;
text-align: center;
}
h1, h2, h3, h4, h5, h6 {
font-size: 100%; }
h1 { margin-bottom: 1em; }
p { margin: 1em 0; }
a { color: #00a; }
a:hover { color: #000; }
a:visited { color: #a0a; }
/*****************************************************************************/
/*
/* Home
/*
/*****************************************************************************/
ul.posts {
list-style-type: none;
margin-bottom: 2em;
}
ul.posts li {
line-height: 1.75em;
}
ul.posts span {
color: #aaa;
font-family: Monaco, "Courier New", monospace;
font-size: 80%;
}
/*****************************************************************************/
/*
/* Site
/*
/*****************************************************************************/
.site {
font-size: 115%;
text-align: justify;
width: 42em;
margin: 3em auto 2em;
line-height: 1.5em;
}
.site .header a {
font-weight: bold;
text-decoration: none;
}
.site .header h1.title {
display: inline-block;
margin-bottom: 2em;
}
.site .header h1.title a {
color: #a00;
}
.site .header h1.title a:hover {
color: #000;
}
.site .header a.extra {
color: #aaa;
margin-left: 1em;
}
.site .header a.extra:hover {
color: #000;
}
.site .meta {
color: #aaa;
}
.site .footer {
font-size: 80%;
color: #666;
border-top: 4px solid #eee;
margin-top: 2em;
overflow: hidden;
}
.site .footer .contact {
float: left;
margin-right: 3em;
}
.site .footer .contact a {
color: #8085C1;
}
.site .footer .rss {
margin-top: 1.1em;
margin-right: -.2em;
float: right;
}
.site .footer .rss img {
border: 0;
}
/*****************************************************************************/
/*
/* Posts
/*
/*****************************************************************************/
/* standard */
.post pre {
border: 1px solid #ddd;
background-color: #eef;
padding: 0 .4em;
}
.post ul, .post ol {
margin-left: 1.35em;
}
.post code {
border: 1px solid #ddd;
background-color: #eef;
padding: 0 .2em;
}
.post pre code {
border: none;
}
/* terminal */
.post pre.terminal {
border: 1px solid #000;
background-color: #333;
color: #FFF;
}
.post pre.terminal code {
background-color: #333;
}

View File

@ -1,11 +0,0 @@
---
layout: default
---
<div class="row-fluid">
<div class="span10 offset1{% if page.id != 'home' %} simple-page{% endif %}{% if page.sectionid == 'faq' %} faq-page{% endif %}">
{{ content }}
</div>
</div>

View File

@ -1,96 +0,0 @@
<!--
PIE: CSS3 rendering for IE
Version 1.0.0
http://css3pie.com
Dual-licensed for use under the Apache License Version 2.0 or the General Public License (GPL) Version 2.
-->
<PUBLIC:COMPONENT lightWeight="true">
<!-- saved from url=(0014)about:internet -->
<PUBLIC:ATTACH EVENT="oncontentready" FOR="element" ONEVENT="init()" />
<PUBLIC:ATTACH EVENT="ondocumentready" FOR="element" ONEVENT="init()" />
<PUBLIC:ATTACH EVENT="ondetach" FOR="element" ONEVENT="cleanup()" />
<script type="text/javascript">
var doc = element.document;var f=window.PIE;
if(!f){f=window.PIE={F:"-pie-",nb:"Pie",La:"pie_",Ac:{TD:1,TH:1},cc:{TABLE:1,THEAD:1,TBODY:1,TFOOT:1,TR:1,INPUT:1,TEXTAREA:1,SELECT:1,OPTION:1,IMG:1,HR:1},fc:{A:1,INPUT:1,TEXTAREA:1,SELECT:1,BUTTON:1},Gd:{submit:1,button:1,reset:1},aa:function(){}};try{doc.execCommand("BackgroundImageCache",false,true)}catch(aa){}for(var ba=4,Z=doc.createElement("div"),ca=Z.getElementsByTagName("i"),ga;Z.innerHTML="<!--[if gt IE "+ ++ba+"]><i></i><![endif]--\>",ca[0];);f.O=ba;if(ba===6)f.F=f.F.replace(/^-/,"");f.ja=
doc.documentMode||f.O;Z.innerHTML='<v:shape adj="1"/>';ga=Z.firstChild;ga.style.behavior="url(#default#VML)";f.zc=typeof ga.adj==="object";(function(){var a,b=0,c={};f.p={Za:function(d){if(!a){a=doc.createDocumentFragment();a.namespaces.add("css3vml","urn:schemas-microsoft-com:vml")}return a.createElement("css3vml:"+d)},Ba:function(d){return d&&d._pieId||(d._pieId="_"+ ++b)},Eb:function(d){var e,g,j,i,h=arguments;e=1;for(g=h.length;e<g;e++){i=h[e];for(j in i)if(i.hasOwnProperty(j))d[j]=i[j]}return d},
Rb:function(d,e,g){var j=c[d],i,h;if(j)Object.prototype.toString.call(j)==="[object Array]"?j.push([e,g]):e.call(g,j);else{h=c[d]=[[e,g]];i=new Image;i.onload=function(){j=c[d]={h:i.width,f:i.height};for(var k=0,n=h.length;k<n;k++)h[k][0].call(h[k][1],j);i.onload=null};i.src=d}}}})();f.Na={gc:function(a,b,c,d){function e(){k=j>=90&&j<270?b:0;n=j<180?c:0;m=b-k;p=c-n}function g(){for(;j<0;)j+=360;j%=360}var j=d.sa;d=d.zb;var i,h,k,n,m,p,r,t;if(d){d=d.coords(a,b,c);i=d.x;h=d.y}if(j){j=j.jd();g();e();
if(!d){i=k;h=n}d=f.Na.tc(i,h,j,m,p);a=d[0];d=d[1]}else if(d){a=b-i;d=c-h}else{i=h=a=0;d=c}r=a-i;t=d-h;if(j===void 0){j=!r?t<0?90:270:!t?r<0?180:0:-Math.atan2(t,r)/Math.PI*180;g();e()}return{sa:j,xc:i,yc:h,td:a,ud:d,Wd:k,Xd:n,rd:m,sd:p,kd:r,ld:t,rc:f.Na.dc(i,h,a,d)}},tc:function(a,b,c,d,e){if(c===0||c===180)return[d,b];else if(c===90||c===270)return[a,e];else{c=Math.tan(-c*Math.PI/180);a=c*a-b;b=-1/c;d=b*d-e;e=b-c;return[(d-a)/e,(c*d-b*a)/e]}},dc:function(a,b,c,d){a=c-a;b=d-b;return Math.abs(a===0?
b:b===0?a:Math.sqrt(a*a+b*b))}};f.ea=function(){this.Gb=[];this.oc={}};f.ea.prototype={ba:function(a){var b=f.p.Ba(a),c=this.oc,d=this.Gb;if(!(b in c)){c[b]=d.length;d.push(a)}},Ha:function(a){a=f.p.Ba(a);var b=this.oc;if(a&&a in b){delete this.Gb[b[a]];delete b[a]}},xa:function(){for(var a=this.Gb,b=a.length;b--;)a[b]&&a[b]()}};f.Oa=new f.ea;f.Oa.Rd=function(){var a=this,b;if(!a.Sd){b=doc.documentElement.currentStyle.getAttribute(f.F+"poll-interval")||250;(function c(){a.xa();setTimeout(c,b)})();
a.Sd=1}};(function(){function a(){f.L.xa();window.detachEvent("onunload",a);window.PIE=null}f.L=new f.ea;window.attachEvent("onunload",a);f.L.ta=function(b,c,d){b.attachEvent(c,d);this.ba(function(){b.detachEvent(c,d)})}})();f.Qa=new f.ea;f.L.ta(window,"onresize",function(){f.Qa.xa()});(function(){function a(){f.mb.xa()}f.mb=new f.ea;f.L.ta(window,"onscroll",a);f.Qa.ba(a)})();(function(){function a(){c=f.kb.md()}function b(){if(c){for(var d=0,e=c.length;d<e;d++)f.attach(c[d]);c=0}}var c;if(f.ja<9){f.L.ta(window,
"onbeforeprint",a);f.L.ta(window,"onafterprint",b)}})();f.lb=new f.ea;f.L.ta(doc,"onmouseup",function(){f.lb.xa()});f.he=function(){function a(h){this.Y=h}var b=doc.createElement("length-calc"),c=doc.body||doc.documentElement,d=b.style,e={},g=["mm","cm","in","pt","pc"],j=g.length,i={};d.position="absolute";d.top=d.left="-9999px";for(c.appendChild(b);j--;){d.width="100"+g[j];e[g[j]]=b.offsetWidth/100}c.removeChild(b);d.width="1em";a.prototype={Kb:/(px|em|ex|mm|cm|in|pt|pc|%)$/,ic:function(){var h=
this.Jd;if(h===void 0)h=this.Jd=parseFloat(this.Y);return h},yb:function(){var h=this.ae;if(!h)h=this.ae=(h=this.Y.match(this.Kb))&&h[0]||"px";return h},a:function(h,k){var n=this.ic(),m=this.yb();switch(m){case "px":return n;case "%":return n*(typeof k==="function"?k():k)/100;case "em":return n*this.xb(h);case "ex":return n*this.xb(h)/2;default:return n*e[m]}},xb:function(h){var k=h.currentStyle.fontSize,n,m;if(k.indexOf("px")>0)return parseFloat(k);else if(h.tagName in f.cc){m=this;n=h.parentNode;
return f.n(k).a(n,function(){return m.xb(n)})}else{h.appendChild(b);k=b.offsetWidth;b.parentNode===h&&h.removeChild(b);return k}}};f.n=function(h){return i[h]||(i[h]=new a(h))};return a}();f.Ja=function(){function a(e){this.X=e}var b=f.n("50%"),c={top:1,center:1,bottom:1},d={left:1,center:1,right:1};a.prototype={zd:function(){if(!this.ac){var e=this.X,g=e.length,j=f.v,i=j.qa,h=f.n("0");i=i.na;h=["left",h,"top",h];if(g===1){e.push(new j.ob(i,"center"));g++}if(g===2){i&(e[0].k|e[1].k)&&e[0].d in c&&
e[1].d in d&&e.push(e.shift());if(e[0].k&i)if(e[0].d==="center")h[1]=b;else h[0]=e[0].d;else if(e[0].W())h[1]=f.n(e[0].d);if(e[1].k&i)if(e[1].d==="center")h[3]=b;else h[2]=e[1].d;else if(e[1].W())h[3]=f.n(e[1].d)}this.ac=h}return this.ac},coords:function(e,g,j){var i=this.zd(),h=i[1].a(e,g);e=i[3].a(e,j);return{x:i[0]==="right"?g-h:h,y:i[2]==="bottom"?j-e:e}}};return a}();f.Ka=function(){function a(b,c){this.h=b;this.f=c}a.prototype={a:function(b,c,d,e,g){var j=this.h,i=this.f,h=c/d;e=e/g;if(j===
"contain"){j=e>h?c:d*e;i=e>h?c/e:d}else if(j==="cover"){j=e<h?c:d*e;i=e<h?c/e:d}else if(j==="auto"){i=i==="auto"?g:i.a(b,d);j=i*e}else{j=j.a(b,c);i=i==="auto"?j/e:i.a(b,d)}return{h:j,f:i}}};a.Kc=new a("auto","auto");return a}();f.Ec=function(){function a(b){this.Y=b}a.prototype={Kb:/[a-z]+$/i,yb:function(){return this.ad||(this.ad=this.Y.match(this.Kb)[0].toLowerCase())},jd:function(){var b=this.Vc,c;if(b===undefined){b=this.yb();c=parseFloat(this.Y,10);b=this.Vc=b==="deg"?c:b==="rad"?c/Math.PI*180:
b==="grad"?c/400*360:b==="turn"?c*360:0}return b}};return a}();f.Jc=function(){function a(c){this.Y=c}var b={};a.Qd=/\s*rgba\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d+|\d*\.\d+)\s*\)\s*/;a.Fb={aliceblue:"F0F8FF",antiquewhite:"FAEBD7",aqua:"0FF",aquamarine:"7FFFD4",azure:"F0FFFF",beige:"F5F5DC",bisque:"FFE4C4",black:"000",blanchedalmond:"FFEBCD",blue:"00F",blueviolet:"8A2BE2",brown:"A52A2A",burlywood:"DEB887",cadetblue:"5F9EA0",chartreuse:"7FFF00",chocolate:"D2691E",coral:"FF7F50",cornflowerblue:"6495ED",
cornsilk:"FFF8DC",crimson:"DC143C",cyan:"0FF",darkblue:"00008B",darkcyan:"008B8B",darkgoldenrod:"B8860B",darkgray:"A9A9A9",darkgreen:"006400",darkkhaki:"BDB76B",darkmagenta:"8B008B",darkolivegreen:"556B2F",darkorange:"FF8C00",darkorchid:"9932CC",darkred:"8B0000",darksalmon:"E9967A",darkseagreen:"8FBC8F",darkslateblue:"483D8B",darkslategray:"2F4F4F",darkturquoise:"00CED1",darkviolet:"9400D3",deeppink:"FF1493",deepskyblue:"00BFFF",dimgray:"696969",dodgerblue:"1E90FF",firebrick:"B22222",floralwhite:"FFFAF0",
forestgreen:"228B22",fuchsia:"F0F",gainsboro:"DCDCDC",ghostwhite:"F8F8FF",gold:"FFD700",goldenrod:"DAA520",gray:"808080",green:"008000",greenyellow:"ADFF2F",honeydew:"F0FFF0",hotpink:"FF69B4",indianred:"CD5C5C",indigo:"4B0082",ivory:"FFFFF0",khaki:"F0E68C",lavender:"E6E6FA",lavenderblush:"FFF0F5",lawngreen:"7CFC00",lemonchiffon:"FFFACD",lightblue:"ADD8E6",lightcoral:"F08080",lightcyan:"E0FFFF",lightgoldenrodyellow:"FAFAD2",lightgreen:"90EE90",lightgrey:"D3D3D3",lightpink:"FFB6C1",lightsalmon:"FFA07A",
lightseagreen:"20B2AA",lightskyblue:"87CEFA",lightslategray:"789",lightsteelblue:"B0C4DE",lightyellow:"FFFFE0",lime:"0F0",limegreen:"32CD32",linen:"FAF0E6",magenta:"F0F",maroon:"800000",mediumauqamarine:"66CDAA",mediumblue:"0000CD",mediumorchid:"BA55D3",mediumpurple:"9370D8",mediumseagreen:"3CB371",mediumslateblue:"7B68EE",mediumspringgreen:"00FA9A",mediumturquoise:"48D1CC",mediumvioletred:"C71585",midnightblue:"191970",mintcream:"F5FFFA",mistyrose:"FFE4E1",moccasin:"FFE4B5",navajowhite:"FFDEAD",
navy:"000080",oldlace:"FDF5E6",olive:"808000",olivedrab:"688E23",orange:"FFA500",orangered:"FF4500",orchid:"DA70D6",palegoldenrod:"EEE8AA",palegreen:"98FB98",paleturquoise:"AFEEEE",palevioletred:"D87093",papayawhip:"FFEFD5",peachpuff:"FFDAB9",peru:"CD853F",pink:"FFC0CB",plum:"DDA0DD",powderblue:"B0E0E6",purple:"800080",red:"F00",rosybrown:"BC8F8F",royalblue:"4169E1",saddlebrown:"8B4513",salmon:"FA8072",sandybrown:"F4A460",seagreen:"2E8B57",seashell:"FFF5EE",sienna:"A0522D",silver:"C0C0C0",skyblue:"87CEEB",
slateblue:"6A5ACD",slategray:"708090",snow:"FFFAFA",springgreen:"00FF7F",steelblue:"4682B4",tan:"D2B48C",teal:"008080",thistle:"D8BFD8",tomato:"FF6347",turquoise:"40E0D0",violet:"EE82EE",wheat:"F5DEB3",white:"FFF",whitesmoke:"F5F5F5",yellow:"FF0",yellowgreen:"9ACD32"};a.prototype={parse:function(){if(!this.Ua){var c=this.Y,d;if(d=c.match(a.Qd)){this.Ua="rgb("+d[1]+","+d[2]+","+d[3]+")";this.Yb=parseFloat(d[4])}else{if((d=c.toLowerCase())in a.Fb)c="#"+a.Fb[d];this.Ua=c;this.Yb=c==="transparent"?0:
1}}},U:function(c){this.parse();return this.Ua==="currentColor"?c.currentStyle.color:this.Ua},fa:function(){this.parse();return this.Yb}};f.ha=function(c){return b[c]||(b[c]=new a(c))};return a}();f.v=function(){function a(c){this.$a=c;this.ch=0;this.X=[];this.Ga=0}var b=a.qa={Ia:1,Wb:2,z:4,Lc:8,Xb:16,na:32,K:64,oa:128,pa:256,Ra:512,Tc:1024,URL:2048};a.ob=function(c,d){this.k=c;this.d=d};a.ob.prototype={Ca:function(){return this.k&b.K||this.k&b.oa&&this.d==="0"},W:function(){return this.Ca()||this.k&
b.Ra}};a.prototype={de:/\s/,Kd:/^[\+\-]?(\d*\.)?\d+/,url:/^url\(\s*("([^"]*)"|'([^']*)'|([!#$%&*-~]*))\s*\)/i,nc:/^\-?[_a-z][\w-]*/i,Yd:/^("([^"]*)"|'([^']*)')/,Bd:/^#([\da-f]{6}|[\da-f]{3})/i,be:{px:b.K,em:b.K,ex:b.K,mm:b.K,cm:b.K,"in":b.K,pt:b.K,pc:b.K,deg:b.Ia,rad:b.Ia,grad:b.Ia},fd:{rgb:1,rgba:1,hsl:1,hsla:1},next:function(c){function d(p,r){p=new a.ob(p,r);if(!c){k.X.push(p);k.Ga++}return p}function e(){k.Ga++;return null}var g,j,i,h,k=this;if(this.Ga<this.X.length)return this.X[this.Ga++];for(;this.de.test(this.$a.charAt(this.ch));)this.ch++;
if(this.ch>=this.$a.length)return e();j=this.ch;g=this.$a.substring(this.ch);i=g.charAt(0);switch(i){case "#":if(h=g.match(this.Bd)){this.ch+=h[0].length;return d(b.z,h[0])}break;case '"':case "'":if(h=g.match(this.Yd)){this.ch+=h[0].length;return d(b.Tc,h[2]||h[3]||"")}break;case "/":case ",":this.ch++;return d(b.pa,i);case "u":if(h=g.match(this.url)){this.ch+=h[0].length;return d(b.URL,h[2]||h[3]||h[4]||"")}}if(h=g.match(this.Kd)){i=h[0];this.ch+=i.length;if(g.charAt(i.length)==="%"){this.ch++;
return d(b.Ra,i+"%")}if(h=g.substring(i.length).match(this.nc)){i+=h[0];this.ch+=h[0].length;return d(this.be[h[0].toLowerCase()]||b.Lc,i)}return d(b.oa,i)}if(h=g.match(this.nc)){i=h[0];this.ch+=i.length;if(i.toLowerCase()in f.Jc.Fb||i==="currentColor"||i==="transparent")return d(b.z,i);if(g.charAt(i.length)==="("){this.ch++;if(i.toLowerCase()in this.fd){g=function(p){return p&&p.k&b.oa};h=function(p){return p&&p.k&(b.oa|b.Ra)};var n=function(p,r){return p&&p.d===r},m=function(){return k.next(1)};
if((i.charAt(0)==="r"?h(m()):g(m()))&&n(m(),",")&&h(m())&&n(m(),",")&&h(m())&&(i==="rgb"||i==="hsa"||n(m(),",")&&g(m()))&&n(m(),")"))return d(b.z,this.$a.substring(j,this.ch));return e()}return d(b.Xb,i)}return d(b.na,i)}this.ch++;return d(b.Wb,i)},D:function(){return this.X[this.Ga-- -2]},all:function(){for(;this.next(););return this.X},ma:function(c,d){for(var e=[],g,j;g=this.next();){if(c(g)){j=true;this.D();break}e.push(g)}return d&&!j?null:e}};return a}();var ha=function(a){this.e=a};ha.prototype=
{Z:0,Od:function(){var a=this.qb,b;return!a||(b=this.o())&&(a.x!==b.x||a.y!==b.y)},Td:function(){var a=this.qb,b;return!a||(b=this.o())&&(a.h!==b.h||a.f!==b.f)},hc:function(){var a=this.e,b=a.getBoundingClientRect(),c=f.ja===9,d=f.O===7,e=b.right-b.left;return{x:b.left,y:b.top,h:c||d?a.offsetWidth:e,f:c||d?a.offsetHeight:b.bottom-b.top,Hd:d&&e?a.offsetWidth/e:1}},o:function(){return this.Z?this.Va||(this.Va=this.hc()):this.hc()},Ad:function(){return!!this.qb},cb:function(){++this.Z},hb:function(){if(!--this.Z){if(this.Va)this.qb=
this.Va;this.Va=null}}};(function(){function a(b){var c=f.p.Ba(b);return function(){if(this.Z){var d=this.$b||(this.$b={});return c in d?d[c]:(d[c]=b.call(this))}else return b.call(this)}}f.B={Z:0,ka:function(b){function c(d){this.e=d;this.Zb=this.ia()}f.p.Eb(c.prototype,f.B,b);c.$c={};return c},j:function(){var b=this.ia(),c=this.constructor.$c;return b?b in c?c[b]:(c[b]=this.la(b)):null},ia:a(function(){var b=this.e,c=this.constructor,d=b.style;b=b.currentStyle;var e=this.wa,g=this.Fa,j=c.Yc||(c.Yc=
f.F+e);c=c.Zc||(c.Zc=f.nb+g.charAt(0).toUpperCase()+g.substring(1));return d[c]||b.getAttribute(j)||d[g]||b.getAttribute(e)}),i:a(function(){return!!this.j()}),H:a(function(){var b=this.ia(),c=b!==this.Zb;this.Zb=b;return c}),va:a,cb:function(){++this.Z},hb:function(){--this.Z||delete this.$b}}})();f.Sb=f.B.ka({wa:f.F+"background",Fa:f.nb+"Background",cd:{scroll:1,fixed:1,local:1},fb:{"repeat-x":1,"repeat-y":1,repeat:1,"no-repeat":1},sc:{"padding-box":1,"border-box":1,"content-box":1},Pd:{top:1,right:1,
bottom:1,left:1,center:1},Ud:{contain:1,cover:1},eb:{Ma:"backgroundClip",z:"backgroundColor",da:"backgroundImage",Pa:"backgroundOrigin",S:"backgroundPosition",T:"backgroundRepeat",Sa:"backgroundSize"},la:function(a){function b(s){return s&&s.W()||s.k&k&&s.d in t}function c(s){return s&&(s.W()&&f.n(s.d)||s.d==="auto"&&"auto")}var d=this.e.currentStyle,e,g,j,i=f.v.qa,h=i.pa,k=i.na,n=i.z,m,p,r=0,t=this.Pd,v,l,q={M:[]};if(this.wb()){e=new f.v(a);for(j={};g=e.next();){m=g.k;p=g.d;if(!j.P&&m&i.Xb&&p===
"linear-gradient"){v={ca:[],P:p};for(l={};g=e.next();){m=g.k;p=g.d;if(m&i.Wb&&p===")"){l.color&&v.ca.push(l);v.ca.length>1&&f.p.Eb(j,v);break}if(m&n){if(v.sa||v.zb){g=e.D();if(g.k!==h)break;e.next()}l={color:f.ha(p)};g=e.next();if(g.W())l.db=f.n(g.d);else e.D()}else if(m&i.Ia&&!v.sa&&!l.color&&!v.ca.length)v.sa=new f.Ec(g.d);else if(b(g)&&!v.zb&&!l.color&&!v.ca.length){e.D();v.zb=new f.Ja(e.ma(function(s){return!b(s)},false))}else if(m&h&&p===","){if(l.color){v.ca.push(l);l={}}}else break}}else if(!j.P&&
m&i.URL){j.Ab=p;j.P="image"}else if(b(g)&&!j.$){e.D();j.$=new f.Ja(e.ma(function(s){return!b(s)},false))}else if(m&k)if(p in this.fb&&!j.bb)j.bb=p;else if(p in this.sc&&!j.Wa){j.Wa=p;if((g=e.next())&&g.k&k&&g.d in this.sc)j.ub=g.d;else{j.ub=p;e.D()}}else if(p in this.cd&&!j.bc)j.bc=p;else return null;else if(m&n&&!q.color)q.color=f.ha(p);else if(m&h&&p==="/"&&!j.Xa&&j.$){g=e.next();if(g.k&k&&g.d in this.Ud)j.Xa=new f.Ka(g.d);else if(g=c(g)){m=c(e.next());if(!m){m=g;e.D()}j.Xa=new f.Ka(g,m)}else return null}else if(m&
h&&p===","&&j.P){j.Hb=a.substring(r,e.ch-1);r=e.ch;q.M.push(j);j={}}else return null}if(j.P){j.Hb=a.substring(r);q.M.push(j)}}else this.Bc(f.ja<9?function(){var s=this.eb,o=d[s.S+"X"],u=d[s.S+"Y"],x=d[s.da],y=d[s.z];if(y!=="transparent")q.color=f.ha(y);if(x!=="none")q.M=[{P:"image",Ab:(new f.v(x)).next().d,bb:d[s.T],$:new f.Ja((new f.v(o+" "+u)).all())}]}:function(){var s=this.eb,o=/\s*,\s*/,u=d[s.da].split(o),x=d[s.z],y,z,B,E,D,C;if(x!=="transparent")q.color=f.ha(x);if((E=u.length)&&u[0]!=="none"){x=
d[s.T].split(o);y=d[s.S].split(o);z=d[s.Pa].split(o);B=d[s.Ma].split(o);s=d[s.Sa].split(o);q.M=[];for(o=0;o<E;o++)if((D=u[o])&&D!=="none"){C=s[o].split(" ");q.M.push({Hb:D+" "+x[o]+" "+y[o]+" / "+s[o]+" "+z[o]+" "+B[o],P:"image",Ab:(new f.v(D)).next().d,bb:x[o],$:new f.Ja((new f.v(y[o])).all()),Wa:z[o],ub:B[o],Xa:new f.Ka(C[0],C[1])})}}});return q.color||q.M[0]?q:null},Bc:function(a){var b=f.ja>8,c=this.eb,d=this.e.runtimeStyle,e=d[c.da],g=d[c.z],j=d[c.T],i,h,k,n;if(e)d[c.da]="";if(g)d[c.z]="";if(j)d[c.T]=
"";if(b){i=d[c.Ma];h=d[c.Pa];n=d[c.S];k=d[c.Sa];if(i)d[c.Ma]="";if(h)d[c.Pa]="";if(n)d[c.S]="";if(k)d[c.Sa]=""}a=a.call(this);if(e)d[c.da]=e;if(g)d[c.z]=g;if(j)d[c.T]=j;if(b){if(i)d[c.Ma]=i;if(h)d[c.Pa]=h;if(n)d[c.S]=n;if(k)d[c.Sa]=k}return a},ia:f.B.va(function(){return this.wb()||this.Bc(function(){var a=this.e.currentStyle,b=this.eb;return a[b.z]+" "+a[b.da]+" "+a[b.T]+" "+a[b.S+"X"]+" "+a[b.S+"Y"]})}),wb:f.B.va(function(){var a=this.e;return a.style[this.Fa]||a.currentStyle.getAttribute(this.wa)}),
qc:function(){var a=0;if(f.O<7){a=this.e;a=""+(a.style[f.nb+"PngFix"]||a.currentStyle.getAttribute(f.F+"png-fix"))==="true"}return a},i:f.B.va(function(){return(this.wb()||this.qc())&&!!this.j()})});f.Vb=f.B.ka({wc:["Top","Right","Bottom","Left"],Id:{thin:"1px",medium:"3px",thick:"5px"},la:function(){var a={},b={},c={},d=false,e=true,g=true,j=true;this.Cc(function(){for(var i=this.e.currentStyle,h=0,k,n,m,p,r,t,v;h<4;h++){m=this.wc[h];v=m.charAt(0).toLowerCase();k=b[v]=i["border"+m+"Style"];n=i["border"+
m+"Color"];m=i["border"+m+"Width"];if(h>0){if(k!==p)g=false;if(n!==r)e=false;if(m!==t)j=false}p=k;r=n;t=m;c[v]=f.ha(n);m=a[v]=f.n(b[v]==="none"?"0":this.Id[m]||m);if(m.a(this.e)>0)d=true}});return d?{J:a,Zd:b,gd:c,ee:j,hd:e,$d:g}:null},ia:f.B.va(function(){var a=this.e,b=a.currentStyle,c;a.tagName in f.Ac&&a.offsetParent.currentStyle.borderCollapse==="collapse"||this.Cc(function(){c=b.borderWidth+"|"+b.borderStyle+"|"+b.borderColor});return c}),Cc:function(a){var b=this.e.runtimeStyle,c=b.borderWidth,
d=b.borderColor;if(c)b.borderWidth="";if(d)b.borderColor="";a=a.call(this);if(c)b.borderWidth=c;if(d)b.borderColor=d;return a}});(function(){f.jb=f.B.ka({wa:"border-radius",Fa:"borderRadius",la:function(b){var c=null,d,e,g,j,i=false;if(b){e=new f.v(b);var h=function(){for(var k=[],n;(g=e.next())&&g.W();){j=f.n(g.d);n=j.ic();if(n<0)return null;if(n>0)i=true;k.push(j)}return k.length>0&&k.length<5?{tl:k[0],tr:k[1]||k[0],br:k[2]||k[0],bl:k[3]||k[1]||k[0]}:null};if(b=h()){if(g){if(g.k&f.v.qa.pa&&g.d===
"/")d=h()}else d=b;if(i&&b&&d)c={x:b,y:d}}}return c}});var a=f.n("0");a={tl:a,tr:a,br:a,bl:a};f.jb.Dc={x:a,y:a}})();f.Ub=f.B.ka({wa:"border-image",Fa:"borderImage",fb:{stretch:1,round:1,repeat:1,space:1},la:function(a){var b=null,c,d,e,g,j,i,h=0,k=f.v.qa,n=k.na,m=k.oa,p=k.Ra;if(a){c=new f.v(a);b={};for(var r=function(l){return l&&l.k&k.pa&&l.d==="/"},t=function(l){return l&&l.k&n&&l.d==="fill"},v=function(){g=c.ma(function(l){return!(l.k&(m|p))});if(t(c.next())&&!b.fill)b.fill=true;else c.D();if(r(c.next())){h++;
j=c.ma(function(l){return!l.W()&&!(l.k&n&&l.d==="auto")});if(r(c.next())){h++;i=c.ma(function(l){return!l.Ca()})}}else c.D()};a=c.next();){d=a.k;e=a.d;if(d&(m|p)&&!g){c.D();v()}else if(t(a)&&!b.fill){b.fill=true;v()}else if(d&n&&this.fb[e]&&!b.repeat){b.repeat={f:e};if(a=c.next())if(a.k&n&&this.fb[a.d])b.repeat.Ob=a.d;else c.D()}else if(d&k.URL&&!b.src)b.src=e;else return null}if(!b.src||!g||g.length<1||g.length>4||j&&j.length>4||h===1&&j.length<1||i&&i.length>4||h===2&&i.length<1)return null;if(!b.repeat)b.repeat=
{f:"stretch"};if(!b.repeat.Ob)b.repeat.Ob=b.repeat.f;a=function(l,q){return{t:q(l[0]),r:q(l[1]||l[0]),b:q(l[2]||l[0]),l:q(l[3]||l[1]||l[0])}};b.slice=a(g,function(l){return f.n(l.k&m?l.d+"px":l.d)});if(j&&j[0])b.J=a(j,function(l){return l.W()?f.n(l.d):l.d});if(i&&i[0])b.Da=a(i,function(l){return l.Ca()?f.n(l.d):l.d})}return b}});f.Ic=f.B.ka({wa:"box-shadow",Fa:"boxShadow",la:function(a){var b,c=f.n,d=f.v.qa,e;if(a){e=new f.v(a);b={Da:[],Bb:[]};for(a=function(){for(var g,j,i,h,k,n;g=e.next();){i=g.d;
j=g.k;if(j&d.pa&&i===",")break;else if(g.Ca()&&!k){e.D();k=e.ma(function(m){return!m.Ca()})}else if(j&d.z&&!h)h=i;else if(j&d.na&&i==="inset"&&!n)n=true;else return false}g=k&&k.length;if(g>1&&g<5){(n?b.Bb:b.Da).push({fe:c(k[0].d),ge:c(k[1].d),blur:c(k[2]?k[2].d:"0"),Vd:c(k[3]?k[3].d:"0"),color:f.ha(h||"currentColor")});return true}return false};a(););}return b&&(b.Bb.length||b.Da.length)?b:null}});f.Uc=f.B.ka({ia:f.B.va(function(){var a=this.e.currentStyle;return a.visibility+"|"+a.display}),la:function(){var a=
this.e,b=a.runtimeStyle;a=a.currentStyle;var c=b.visibility,d;b.visibility="";d=a.visibility;b.visibility=c;return{ce:d!=="hidden",nd:a.display!=="none"}},i:function(){return false}});f.u={R:function(a){function b(c,d,e,g){this.e=c;this.s=d;this.g=e;this.parent=g}f.p.Eb(b.prototype,f.u,a);return b},Cb:false,Q:function(){return false},Ea:f.aa,Lb:function(){this.m();this.i()&&this.V()},ib:function(){this.Cb=true},Mb:function(){this.i()?this.V():this.m()},sb:function(a,b){this.vc(a);for(var c=this.ra||
(this.ra=[]),d=a+1,e=c.length,g;d<e;d++)if(g=c[d])break;c[a]=b;this.I().insertBefore(b,g||null)},za:function(a){var b=this.ra;return b&&b[a]||null},vc:function(a){var b=this.za(a),c=this.Ta;if(b&&c){c.removeChild(b);this.ra[a]=null}},Aa:function(a,b,c,d){var e=this.rb||(this.rb={}),g=e[a];if(!g){g=e[a]=f.p.Za("shape");if(b)g.appendChild(g[b]=f.p.Za(b));if(d){c=this.za(d);if(!c){this.sb(d,doc.createElement("group"+d));c=this.za(d)}}c.appendChild(g);a=g.style;a.position="absolute";a.left=a.top=0;a.behavior=
"url(#default#VML)"}return g},vb:function(a){var b=this.rb,c=b&&b[a];if(c){c.parentNode.removeChild(c);delete b[a]}return!!c},kc:function(a){var b=this.e,c=this.s.o(),d=c.h,e=c.f,g,j,i,h,k,n;c=a.x.tl.a(b,d);g=a.y.tl.a(b,e);j=a.x.tr.a(b,d);i=a.y.tr.a(b,e);h=a.x.br.a(b,d);k=a.y.br.a(b,e);n=a.x.bl.a(b,d);a=a.y.bl.a(b,e);d=Math.min(d/(c+j),e/(i+k),d/(n+h),e/(g+a));if(d<1){c*=d;g*=d;j*=d;i*=d;h*=d;k*=d;n*=d;a*=d}return{x:{tl:c,tr:j,br:h,bl:n},y:{tl:g,tr:i,br:k,bl:a}}},ya:function(a,b,c){b=b||1;var d,e,
g=this.s.o();e=g.h*b;g=g.f*b;var j=this.g.G,i=Math.floor,h=Math.ceil,k=a?a.Jb*b:0,n=a?a.Ib*b:0,m=a?a.tb*b:0;a=a?a.Db*b:0;var p,r,t,v,l;if(c||j.i()){d=this.kc(c||j.j());c=d.x.tl*b;j=d.y.tl*b;p=d.x.tr*b;r=d.y.tr*b;t=d.x.br*b;v=d.y.br*b;l=d.x.bl*b;b=d.y.bl*b;e="m"+i(a)+","+i(j)+"qy"+i(c)+","+i(k)+"l"+h(e-p)+","+i(k)+"qx"+h(e-n)+","+i(r)+"l"+h(e-n)+","+h(g-v)+"qy"+h(e-t)+","+h(g-m)+"l"+i(l)+","+h(g-m)+"qx"+i(a)+","+h(g-b)+" x e"}else e="m"+i(a)+","+i(k)+"l"+h(e-n)+","+i(k)+"l"+h(e-n)+","+h(g-m)+"l"+i(a)+
","+h(g-m)+"xe";return e},I:function(){var a=this.parent.za(this.N),b;if(!a){a=doc.createElement(this.Ya);b=a.style;b.position="absolute";b.top=b.left=0;this.parent.sb(this.N,a)}return a},mc:function(){var a=this.e,b=a.currentStyle,c=a.runtimeStyle,d=a.tagName,e=f.O===6,g;if(e&&(d in f.cc||d==="FIELDSET")||d==="BUTTON"||d==="INPUT"&&a.type in f.Gd){c.borderWidth="";d=this.g.w.wc;for(g=d.length;g--;){e=d[g];c["padding"+e]="";c["padding"+e]=f.n(b["padding"+e]).a(a)+f.n(b["border"+e+"Width"]).a(a)+(f.O!==
8&&g%2?1:0)}c.borderWidth=0}else if(e){if(a.childNodes.length!==1||a.firstChild.tagName!=="ie6-mask"){b=doc.createElement("ie6-mask");d=b.style;d.visibility="visible";for(d.zoom=1;d=a.firstChild;)b.appendChild(d);a.appendChild(b);c.visibility="hidden"}}else c.borderColor="transparent"},ie:function(){},m:function(){this.parent.vc(this.N);delete this.rb;delete this.ra}};f.Rc=f.u.R({i:function(){var a=this.ed;for(var b in a)if(a.hasOwnProperty(b)&&a[b].i())return true;return false},Q:function(){return this.g.Pb.H()},
ib:function(){if(this.i()){var a=this.jc(),b=a,c;a=a.currentStyle;var d=a.position,e=this.I().style,g=0,j=0;j=this.s.o();var i=j.Hd;if(d==="fixed"&&f.O>6){g=j.x*i;j=j.y*i;b=d}else{do b=b.offsetParent;while(b&&b.currentStyle.position==="static");if(b){c=b.getBoundingClientRect();b=b.currentStyle;g=(j.x-c.left)*i-(parseFloat(b.borderLeftWidth)||0);j=(j.y-c.top)*i-(parseFloat(b.borderTopWidth)||0)}else{b=doc.documentElement;g=(j.x+b.scrollLeft-b.clientLeft)*i;j=(j.y+b.scrollTop-b.clientTop)*i}b="absolute"}e.position=
b;e.left=g;e.top=j;e.zIndex=d==="static"?-1:a.zIndex;this.Cb=true}},Mb:f.aa,Nb:function(){var a=this.g.Pb.j();this.I().style.display=a.ce&&a.nd?"":"none"},Lb:function(){this.i()?this.Nb():this.m()},jc:function(){var a=this.e;return a.tagName in f.Ac?a.offsetParent:a},I:function(){var a=this.Ta,b;if(!a){b=this.jc();a=this.Ta=doc.createElement("css3-container");a.style.direction="ltr";this.Nb();b.parentNode.insertBefore(a,b)}return a},ab:f.aa,m:function(){var a=this.Ta,b;if(a&&(b=a.parentNode))b.removeChild(a);
delete this.Ta;delete this.ra}});f.Fc=f.u.R({N:2,Ya:"background",Q:function(){var a=this.g;return a.C.H()||a.G.H()},i:function(){var a=this.g;return a.q.i()||a.G.i()||a.C.i()||a.ga.i()&&a.ga.j().Bb},V:function(){var a=this.s.o();if(a.h&&a.f){this.od();this.pd()}},od:function(){var a=this.g.C.j(),b=this.s.o(),c=this.e,d=a&&a.color,e,g;if(d&&d.fa()>0){this.lc();a=this.Aa("bgColor","fill",this.I(),1);e=b.h;b=b.f;a.stroked=false;a.coordsize=e*2+","+b*2;a.coordorigin="1,1";a.path=this.ya(null,2);g=a.style;
g.width=e;g.height=b;a.fill.color=d.U(c);c=d.fa();if(c<1)a.fill.opacity=c}else this.vb("bgColor")},pd:function(){var a=this.g.C.j(),b=this.s.o();a=a&&a.M;var c,d,e,g,j;if(a){this.lc();d=b.h;e=b.f;for(j=a.length;j--;){b=a[j];c=this.Aa("bgImage"+j,"fill",this.I(),2);c.stroked=false;c.fill.type="tile";c.fillcolor="none";c.coordsize=d*2+","+e*2;c.coordorigin="1,1";c.path=this.ya(0,2);g=c.style;g.width=d;g.height=e;if(b.P==="linear-gradient")this.bd(c,b);else{c.fill.src=b.Ab;this.Nd(c,j)}}}for(j=a?a.length:
0;this.vb("bgImage"+j++););},Nd:function(a,b){var c=this;f.p.Rb(a.fill.src,function(d){var e=c.e,g=c.s.o(),j=g.h;g=g.f;if(j&&g){var i=a.fill,h=c.g,k=h.w.j(),n=k&&k.J;k=n?n.t.a(e):0;var m=n?n.r.a(e):0,p=n?n.b.a(e):0;n=n?n.l.a(e):0;h=h.C.j().M[b];e=h.$?h.$.coords(e,j-d.h-n-m,g-d.f-k-p):{x:0,y:0};h=h.bb;p=m=0;var r=j+1,t=g+1,v=f.O===8?0:1;n=Math.round(e.x)+n+0.5;k=Math.round(e.y)+k+0.5;i.position=n/j+","+k/g;i.size.x=1;i.size=d.h+"px,"+d.f+"px";if(h&&h!=="repeat"){if(h==="repeat-x"||h==="no-repeat"){m=
k+1;t=k+d.f+v}if(h==="repeat-y"||h==="no-repeat"){p=n+1;r=n+d.h+v}a.style.clip="rect("+m+"px,"+r+"px,"+t+"px,"+p+"px)"}}})},bd:function(a,b){var c=this.e,d=this.s.o(),e=d.h,g=d.f;a=a.fill;d=b.ca;var j=d.length,i=Math.PI,h=f.Na,k=h.tc,n=h.dc;b=h.gc(c,e,g,b);h=b.sa;var m=b.xc,p=b.yc,r=b.Wd,t=b.Xd,v=b.rd,l=b.sd,q=b.kd,s=b.ld;b=b.rc;e=h%90?Math.atan2(q*e/g,s)/i*180:h+90;e+=180;e%=360;v=k(r,t,h,v,l);g=n(r,t,v[0],v[1]);i=[];v=k(m,p,h,r,t);n=n(m,p,v[0],v[1])/g*100;k=[];for(h=0;h<j;h++)k.push(d[h].db?d[h].db.a(c,
b):h===0?0:h===j-1?b:null);for(h=1;h<j;h++){if(k[h]===null){m=k[h-1];b=h;do p=k[++b];while(p===null);k[h]=m+(p-m)/(b-h+1)}k[h]=Math.max(k[h],k[h-1])}for(h=0;h<j;h++)i.push(n+k[h]/g*100+"% "+d[h].color.U(c));a.angle=e;a.type="gradient";a.method="sigma";a.color=d[0].color.U(c);a.color2=d[j-1].color.U(c);if(a.colors)a.colors.value=i.join(",");else a.colors=i.join(",")},lc:function(){var a=this.e.runtimeStyle;a.backgroundImage="url(about:blank)";a.backgroundColor="transparent"},m:function(){f.u.m.call(this);
var a=this.e.runtimeStyle;a.backgroundImage=a.backgroundColor=""}});f.Gc=f.u.R({N:4,Ya:"border",Q:function(){var a=this.g;return a.w.H()||a.G.H()},i:function(){var a=this.g;return a.G.i()&&!a.q.i()&&a.w.i()},V:function(){var a=this.e,b=this.g.w.j(),c=this.s.o(),d=c.h;c=c.f;var e,g,j,i,h;if(b){this.mc();b=this.wd(2);i=0;for(h=b.length;i<h;i++){j=b[i];e=this.Aa("borderPiece"+i,j.stroke?"stroke":"fill",this.I());e.coordsize=d*2+","+c*2;e.coordorigin="1,1";e.path=j.path;g=e.style;g.width=d;g.height=c;
e.filled=!!j.fill;e.stroked=!!j.stroke;if(j.stroke){e=e.stroke;e.weight=j.Qb+"px";e.color=j.color.U(a);e.dashstyle=j.stroke==="dashed"?"2 2":j.stroke==="dotted"?"1 1":"solid";e.linestyle=j.stroke==="double"&&j.Qb>2?"ThinThin":"Single"}else e.fill.color=j.fill.U(a)}for(;this.vb("borderPiece"+i++););}},wd:function(a){var b=this.e,c,d,e,g=this.g.w,j=[],i,h,k,n,m=Math.round,p,r,t;if(g.i()){c=g.j();g=c.J;r=c.Zd;t=c.gd;if(c.ee&&c.$d&&c.hd){if(t.t.fa()>0){c=g.t.a(b);k=c/2;j.push({path:this.ya({Jb:k,Ib:k,
tb:k,Db:k},a),stroke:r.t,color:t.t,Qb:c})}}else{a=a||1;c=this.s.o();d=c.h;e=c.f;c=m(g.t.a(b));k=m(g.r.a(b));n=m(g.b.a(b));b=m(g.l.a(b));var v={t:c,r:k,b:n,l:b};b=this.g.G;if(b.i())p=this.kc(b.j());i=Math.floor;h=Math.ceil;var l=function(o,u){return p?p[o][u]:0},q=function(o,u,x,y,z,B){var E=l("x",o),D=l("y",o),C=o.charAt(1)==="r";o=o.charAt(0)==="b";return E>0&&D>0?(B?"al":"ae")+(C?h(d-E):i(E))*a+","+(o?h(e-D):i(D))*a+","+(i(E)-u)*a+","+(i(D)-x)*a+","+y*65535+","+2949075*(z?1:-1):(B?"m":"l")+(C?d-
u:u)*a+","+(o?e-x:x)*a},s=function(o,u,x,y){var z=o==="t"?i(l("x","tl"))*a+","+h(u)*a:o==="r"?h(d-u)*a+","+i(l("y","tr"))*a:o==="b"?h(d-l("x","br"))*a+","+i(e-u)*a:i(u)*a+","+h(e-l("y","bl"))*a;o=o==="t"?h(d-l("x","tr"))*a+","+h(u)*a:o==="r"?h(d-u)*a+","+h(e-l("y","br"))*a:o==="b"?i(l("x","bl"))*a+","+i(e-u)*a:i(u)*a+","+i(l("y","tl"))*a;return x?(y?"m"+o:"")+"l"+z:(y?"m"+z:"")+"l"+o};b=function(o,u,x,y,z,B){var E=o==="l"||o==="r",D=v[o],C,F;if(D>0&&r[o]!=="none"&&t[o].fa()>0){C=v[E?o:u];u=v[E?u:
o];F=v[E?o:x];x=v[E?x:o];if(r[o]==="dashed"||r[o]==="dotted"){j.push({path:q(y,C,u,B+45,0,1)+q(y,0,0,B,1,0),fill:t[o]});j.push({path:s(o,D/2,0,1),stroke:r[o],Qb:D,color:t[o]});j.push({path:q(z,F,x,B,0,1)+q(z,0,0,B-45,1,0),fill:t[o]})}else j.push({path:q(y,C,u,B+45,0,1)+s(o,D,0,0)+q(z,F,x,B,0,0)+(r[o]==="double"&&D>2?q(z,F-i(F/3),x-i(x/3),B-45,1,0)+s(o,h(D/3*2),1,0)+q(y,C-i(C/3),u-i(u/3),B,1,0)+"x "+q(y,i(C/3),i(u/3),B+45,0,1)+s(o,i(D/3),1,0)+q(z,i(F/3),i(x/3),B,0,0):"")+q(z,0,0,B-45,1,0)+s(o,0,1,
0)+q(y,0,0,B,1,0),fill:t[o]})}};b("t","l","r","tl","tr",90);b("r","t","b","tr","br",0);b("b","r","l","br","bl",-90);b("l","b","t","bl","tl",-180)}}return j},m:function(){if(this.ec||!this.g.q.i())this.e.runtimeStyle.borderColor="";f.u.m.call(this)}});f.Tb=f.u.R({N:5,Md:["t","tr","r","br","b","bl","l","tl","c"],Q:function(){return this.g.q.H()},i:function(){return this.g.q.i()},V:function(){this.I();var a=this.g.q.j(),b=this.g.w.j(),c=this.s.o(),d=this.e,e=this.uc;f.p.Rb(a.src,function(g){function j(s,
o,u,x,y){s=e[s].style;var z=Math.max;s.width=z(o,0);s.height=z(u,0);s.left=x;s.top=y}function i(s,o,u){for(var x=0,y=s.length;x<y;x++)e[s[x]].imagedata[o]=u}var h=c.h,k=c.f,n=f.n("0"),m=a.J||(b?b.J:{t:n,r:n,b:n,l:n});n=m.t.a(d);var p=m.r.a(d),r=m.b.a(d);m=m.l.a(d);var t=a.slice,v=t.t.a(d),l=t.r.a(d),q=t.b.a(d);t=t.l.a(d);j("tl",m,n,0,0);j("t",h-m-p,n,m,0);j("tr",p,n,h-p,0);j("r",p,k-n-r,h-p,n);j("br",p,r,h-p,k-r);j("b",h-m-p,r,m,k-r);j("bl",m,r,0,k-r);j("l",m,k-n-r,0,n);j("c",h-m-p,k-n-r,m,n);i(["tl",
"t","tr"],"cropBottom",(g.f-v)/g.f);i(["tl","l","bl"],"cropRight",(g.h-t)/g.h);i(["bl","b","br"],"cropTop",(g.f-q)/g.f);i(["tr","r","br"],"cropLeft",(g.h-l)/g.h);i(["l","r","c"],"cropTop",v/g.f);i(["l","r","c"],"cropBottom",q/g.f);i(["t","b","c"],"cropLeft",t/g.h);i(["t","b","c"],"cropRight",l/g.h);e.c.style.display=a.fill?"":"none"},this)},I:function(){var a=this.parent.za(this.N),b,c,d,e=this.Md,g=e.length;if(!a){a=doc.createElement("border-image");b=a.style;b.position="absolute";this.uc={};for(d=
0;d<g;d++){c=this.uc[e[d]]=f.p.Za("rect");c.appendChild(f.p.Za("imagedata"));b=c.style;b.behavior="url(#default#VML)";b.position="absolute";b.top=b.left=0;c.imagedata.src=this.g.q.j().src;c.stroked=false;c.filled=false;a.appendChild(c)}this.parent.sb(this.N,a)}return a},Ea:function(){if(this.i()){var a=this.e,b=a.runtimeStyle,c=this.g.q.j().J;b.borderStyle="solid";if(c){b.borderTopWidth=c.t.a(a)+"px";b.borderRightWidth=c.r.a(a)+"px";b.borderBottomWidth=c.b.a(a)+"px";b.borderLeftWidth=c.l.a(a)+"px"}this.mc()}},
m:function(){var a=this.e.runtimeStyle;a.borderStyle="";if(this.ec||!this.g.w.i())a.borderColor=a.borderWidth="";f.u.m.call(this)}});f.Hc=f.u.R({N:1,Ya:"outset-box-shadow",Q:function(){var a=this.g;return a.ga.H()||a.G.H()},i:function(){var a=this.g.ga;return a.i()&&a.j().Da[0]},V:function(){function a(C,F,O,H,M,P,I){C=b.Aa("shadow"+C+F,"fill",d,j-C);F=C.fill;C.coordsize=n*2+","+m*2;C.coordorigin="1,1";C.stroked=false;C.filled=true;F.color=M.U(c);if(P){F.type="gradienttitle";F.color2=F.color;F.opacity=
0}C.path=I;l=C.style;l.left=O;l.top=H;l.width=n;l.height=m;return C}var b=this,c=this.e,d=this.I(),e=this.g,g=e.ga.j().Da;e=e.G.j();var j=g.length,i=j,h,k=this.s.o(),n=k.h,m=k.f;k=f.O===8?1:0;for(var p=["tl","tr","br","bl"],r,t,v,l,q,s,o,u,x,y,z,B,E,D;i--;){t=g[i];q=t.fe.a(c);s=t.ge.a(c);h=t.Vd.a(c);o=t.blur.a(c);t=t.color;u=-h-o;if(!e&&o)e=f.jb.Dc;u=this.ya({Jb:u,Ib:u,tb:u,Db:u},2,e);if(o){x=(h+o)*2+n;y=(h+o)*2+m;z=x?o*2/x:0;B=y?o*2/y:0;if(o-h>n/2||o-h>m/2)for(h=4;h--;){r=p[h];E=r.charAt(0)==="b";
D=r.charAt(1)==="r";r=a(i,r,q,s,t,o,u);v=r.fill;v.focusposition=(D?1-z:z)+","+(E?1-B:B);v.focussize="0,0";r.style.clip="rect("+((E?y/2:0)+k)+"px,"+(D?x:x/2)+"px,"+(E?y:y/2)+"px,"+((D?x/2:0)+k)+"px)"}else{r=a(i,"",q,s,t,o,u);v=r.fill;v.focusposition=z+","+B;v.focussize=1-z*2+","+(1-B*2)}}else{r=a(i,"",q,s,t,o,u);q=t.fa();if(q<1)r.fill.opacity=q}}}});f.Pc=f.u.R({N:6,Ya:"imgEl",Q:function(){var a=this.g;return this.e.src!==this.Xc||a.G.H()},i:function(){var a=this.g;return a.G.i()||a.C.qc()},V:function(){this.Xc=
j;this.Cd();var a=this.Aa("img","fill",this.I()),b=a.fill,c=this.s.o(),d=c.h;c=c.f;var e=this.g.w.j(),g=e&&e.J;e=this.e;var j=e.src,i=Math.round,h=e.currentStyle,k=f.n;if(!g||f.O<7){g=f.n("0");g={t:g,r:g,b:g,l:g}}a.stroked=false;b.type="frame";b.src=j;b.position=(d?0.5/d:0)+","+(c?0.5/c:0);a.coordsize=d*2+","+c*2;a.coordorigin="1,1";a.path=this.ya({Jb:i(g.t.a(e)+k(h.paddingTop).a(e)),Ib:i(g.r.a(e)+k(h.paddingRight).a(e)),tb:i(g.b.a(e)+k(h.paddingBottom).a(e)),Db:i(g.l.a(e)+k(h.paddingLeft).a(e))},
2);a=a.style;a.width=d;a.height=c},Cd:function(){this.e.runtimeStyle.filter="alpha(opacity=0)"},m:function(){f.u.m.call(this);this.e.runtimeStyle.filter=""}});f.Oc=f.u.R({ib:f.aa,Mb:f.aa,Nb:f.aa,Lb:f.aa,Ld:/^,+|,+$/g,Fd:/,+/g,gb:function(a,b){(this.pb||(this.pb=[]))[a]=b||void 0},ab:function(){var a=this.pb,b;if(a&&(b=a.join(",").replace(this.Ld,"").replace(this.Fd,","))!==this.Wc)this.Wc=this.e.runtimeStyle.background=b},m:function(){this.e.runtimeStyle.background="";delete this.pb}});f.Mc=f.u.R({ua:1,
Q:function(){return this.g.C.H()},i:function(){var a=this.g;return a.C.i()||a.q.i()},V:function(){var a=this.g.C.j(),b,c,d=0,e,g;if(a){b=[];if(c=a.M)for(;e=c[d++];)if(e.P==="linear-gradient"){g=this.vd(e.Wa);g=(e.Xa||f.Ka.Kc).a(this.e,g.h,g.f,g.h,g.f);b.push("url(data:image/svg+xml,"+escape(this.xd(e,g.h,g.f))+") "+this.dd(e.$)+" / "+g.h+"px "+g.f+"px "+(e.bc||"")+" "+(e.Wa||"")+" "+(e.ub||""))}else b.push(e.Hb);a.color&&b.push(a.color.Y);this.parent.gb(this.ua,b.join(","))}},dd:function(a){return a?
a.X.map(function(b){return b.d}).join(" "):"0 0"},vd:function(a){var b=this.e,c=this.s.o(),d=c.h;c=c.f;var e;if(a!=="border-box")if((e=this.g.w.j())&&(e=e.J)){d-=e.l.a(b)+e.l.a(b);c-=e.t.a(b)+e.b.a(b)}if(a==="content-box"){a=f.n;e=b.currentStyle;d-=a(e.paddingLeft).a(b)+a(e.paddingRight).a(b);c-=a(e.paddingTop).a(b)+a(e.paddingBottom).a(b)}return{h:d,f:c}},xd:function(a,b,c){var d=this.e,e=a.ca,g=e.length,j=f.Na.gc(d,b,c,a);a=j.xc;var i=j.yc,h=j.td,k=j.ud;j=j.rc;var n,m,p,r,t;n=[];for(m=0;m<g;m++)n.push(e[m].db?
e[m].db.a(d,j):m===0?0:m===g-1?j:null);for(m=1;m<g;m++)if(n[m]===null){r=n[m-1];p=m;do t=n[++p];while(t===null);n[m]=r+(t-r)/(p-m+1)}b=['<svg width="'+b+'" height="'+c+'" xmlns="http://www.w3.org/2000/svg"><defs><linearGradient id="g" gradientUnits="userSpaceOnUse" x1="'+a/b*100+'%" y1="'+i/c*100+'%" x2="'+h/b*100+'%" y2="'+k/c*100+'%">'];for(m=0;m<g;m++)b.push('<stop offset="'+n[m]/j+'" stop-color="'+e[m].color.U(d)+'" stop-opacity="'+e[m].color.fa()+'"/>');b.push('</linearGradient></defs><rect width="100%" height="100%" fill="url(#g)"/></svg>');
return b.join("")},m:function(){this.parent.gb(this.ua)}});f.Nc=f.u.R({T:"repeat",Sc:"stretch",Qc:"round",ua:0,Q:function(){return this.g.q.H()},i:function(){return this.g.q.i()},V:function(){var a=this,b=a.g.q.j(),c=a.g.w.j(),d=a.s.o(),e=b.repeat,g=e.f,j=e.Ob,i=a.e,h=0;f.p.Rb(b.src,function(k){function n(Q,R,U,V,W,Y,X,S,w,A){K.push('<pattern patternUnits="userSpaceOnUse" id="pattern'+G+'" x="'+(g===l?Q+U/2-w/2:Q)+'" y="'+(j===l?R+V/2-A/2:R)+'" width="'+w+'" height="'+A+'"><svg width="'+w+'" height="'+
A+'" viewBox="'+W+" "+Y+" "+X+" "+S+'" preserveAspectRatio="none"><image xlink:href="'+v+'" x="0" y="0" width="'+r+'" height="'+t+'" /></svg></pattern>');J.push('<rect x="'+Q+'" y="'+R+'" width="'+U+'" height="'+V+'" fill="url(#pattern'+G+')" />');G++}var m=d.h,p=d.f,r=k.h,t=k.f,v=a.Dd(b.src,r,t),l=a.T,q=a.Sc;k=a.Qc;var s=Math.ceil,o=f.n("0"),u=b.J||(c?c.J:{t:o,r:o,b:o,l:o});o=u.t.a(i);var x=u.r.a(i),y=u.b.a(i);u=u.l.a(i);var z=b.slice,B=z.t.a(i),E=z.r.a(i),D=z.b.a(i);z=z.l.a(i);var C=m-u-x,F=p-o-
y,O=r-z-E,H=t-B-D,M=g===q?C:O*o/B,P=j===q?F:H*x/E,I=g===q?C:O*y/D;q=j===q?F:H*u/z;var K=[],J=[],G=0;if(g===k){M-=(M-(C%M||M))/s(C/M);I-=(I-(C%I||I))/s(C/I)}if(j===k){P-=(P-(F%P||P))/s(F/P);q-=(q-(F%q||q))/s(F/q)}k=['<svg width="'+m+'" height="'+p+'" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">'];n(0,0,u,o,0,0,z,B,u,o);n(u,0,C,o,z,0,O,B,M,o);n(m-x,0,x,o,r-E,0,E,B,x,o);n(0,o,u,F,0,B,z,H,u,q);if(b.fill)n(u,o,C,F,z,B,O,H,M||I||O,q||P||H);n(m-x,o,x,F,r-E,B,E,H,x,P);n(0,
p-y,u,y,0,t-D,z,D,u,y);n(u,p-y,C,y,z,t-D,O,D,I,y);n(m-x,p-y,x,y,r-E,t-D,E,D,x,y);k.push("<defs>"+K.join("\n")+"</defs>"+J.join("\n")+"</svg>");a.parent.gb(a.ua,"url(data:image/svg+xml,"+escape(k.join(""))+") no-repeat border-box border-box");h&&a.parent.ab()},a);h=1},Dd:function(){var a={};return function(b,c,d){var e=a[b],g;if(!e){e=new Image;g=doc.createElement("canvas");e.src=b;g.width=c;g.height=d;g.getContext("2d").drawImage(e,0,0);e=a[b]=g.toDataURL()}return e}}(),Ea:f.Tb.prototype.Ea,m:function(){var a=
this.e.runtimeStyle;this.parent.gb(this.ua);a.borderColor=a.borderStyle=a.borderWidth=""}});f.kb=function(){function a(l,q){l.className+=" "+q}function b(l){var q=v.slice.call(arguments,1),s=q.length;setTimeout(function(){if(l)for(;s--;)a(l,q[s])},0)}function c(l){var q=v.slice.call(arguments,1),s=q.length;setTimeout(function(){if(l)for(;s--;){var o=q[s];o=t[o]||(t[o]=new RegExp("\\b"+o+"\\b","g"));l.className=l.className.replace(o,"")}},0)}function d(l){function q(){if(!U){var w,A,L=f.ja,T=l.currentStyle,
N=T.getAttribute(g)==="true",da=T.getAttribute(i)!=="false",ea=T.getAttribute(h)!=="false";S=T.getAttribute(j);S=L>7?S!=="false":S==="true";if(!R){R=1;l.runtimeStyle.zoom=1;T=l;for(var fa=1;T=T.previousSibling;)if(T.nodeType===1){fa=0;break}fa&&a(l,p)}J.cb();if(N&&(A=J.o())&&(w=doc.documentElement||doc.body)&&(A.y>w.clientHeight||A.x>w.clientWidth||A.y+A.f<0||A.x+A.h<0)){if(!Y){Y=1;f.mb.ba(q)}}else{U=1;Y=R=0;f.mb.Ha(q);if(L===9){G={C:new f.Sb(l),q:new f.Ub(l),w:new f.Vb(l)};Q=[G.C,G.q];K=new f.Oc(l,
J,G);w=[new f.Mc(l,J,G,K),new f.Nc(l,J,G,K)]}else{G={C:new f.Sb(l),w:new f.Vb(l),q:new f.Ub(l),G:new f.jb(l),ga:new f.Ic(l),Pb:new f.Uc(l)};Q=[G.C,G.w,G.q,G.G,G.ga,G.Pb];K=new f.Rc(l,J,G);w=[new f.Hc(l,J,G,K),new f.Fc(l,J,G,K),new f.Gc(l,J,G,K),new f.Tb(l,J,G,K)];l.tagName==="IMG"&&w.push(new f.Pc(l,J,G,K));K.ed=w}I=[K].concat(w);if(w=l.currentStyle.getAttribute(f.F+"watch-ancestors")){w=parseInt(w,10);A=0;for(N=l.parentNode;N&&(w==="NaN"||A++<w);){H(N,"onpropertychange",C);H(N,"onmouseenter",x);
H(N,"onmouseleave",y);H(N,"onmousedown",z);if(N.tagName in f.fc){H(N,"onfocus",E);H(N,"onblur",D)}N=N.parentNode}}if(S){f.Oa.ba(o);f.Oa.Rd()}o(1)}if(!V){V=1;L<9&&H(l,"onmove",s);H(l,"onresize",s);H(l,"onpropertychange",u);ea&&H(l,"onmouseenter",x);if(ea||da)H(l,"onmouseleave",y);da&&H(l,"onmousedown",z);if(l.tagName in f.fc){H(l,"onfocus",E);H(l,"onblur",D)}f.Qa.ba(s);f.L.ba(M)}J.hb()}}function s(){J&&J.Ad()&&o()}function o(w){if(!X)if(U){var A,L=I.length;F();for(A=0;A<L;A++)I[A].Ea();if(w||J.Od())for(A=
0;A<L;A++)I[A].ib();if(w||J.Td())for(A=0;A<L;A++)I[A].Mb();K.ab();O()}else R||q()}function u(){var w,A=I.length,L;w=event;if(!X&&!(w&&w.propertyName in r))if(U){F();for(w=0;w<A;w++)I[w].Ea();for(w=0;w<A;w++){L=I[w];L.Cb||L.ib();L.Q()&&L.Lb()}K.ab();O()}else R||q()}function x(){b(l,k)}function y(){c(l,k,n)}function z(){b(l,n);f.lb.ba(B)}function B(){c(l,n);f.lb.Ha(B)}function E(){b(l,m)}function D(){c(l,m)}function C(){var w=event.propertyName;if(w==="className"||w==="id")u()}function F(){J.cb();for(var w=
Q.length;w--;)Q[w].cb()}function O(){for(var w=Q.length;w--;)Q[w].hb();J.hb()}function H(w,A,L){w.attachEvent(A,L);W.push([w,A,L])}function M(){if(V){for(var w=W.length,A;w--;){A=W[w];A[0].detachEvent(A[1],A[2])}f.L.Ha(M);V=0;W=[]}}function P(){if(!X){var w,A;M();X=1;if(I){w=0;for(A=I.length;w<A;w++){I[w].ec=1;I[w].m()}}S&&f.Oa.Ha(o);f.Qa.Ha(o);I=J=G=Q=l=null}}var I,K,J=new ha(l),G,Q,R,U,V,W=[],Y,X,S;this.Ed=q;this.update=o;this.m=P;this.qd=l}var e={},g=f.F+"lazy-init",j=f.F+"poll",i=f.F+"track-active",
h=f.F+"track-hover",k=f.La+"hover",n=f.La+"active",m=f.La+"focus",p=f.La+"first-child",r={background:1,bgColor:1,display:1},t={},v=[];d.yd=function(l){var q=f.p.Ba(l);return e[q]||(e[q]=new d(l))};d.m=function(l){l=f.p.Ba(l);var q=e[l];if(q){q.m();delete e[l]}};d.md=function(){var l=[],q;if(e){for(var s in e)if(e.hasOwnProperty(s)){q=e[s];l.push(q.qd);q.m()}e={}}return l};return d}();f.supportsVML=f.zc;f.attach=function(a){f.ja<10&&f.zc&&f.kb.yd(a).Ed()};f.detach=function(a){f.kb.m(a)}};
var $=element;function init(){if(doc.media!=="print"){var a=window.PIE;a&&a.attach($)}}function cleanup(){if(doc.media!=="print"){var a=window.PIE;if(a){a.detach($);$=0}}}$.readyState==="complete"&&init();
</script>
</PUBLIC:COMPONENT>

View File

@ -1,44 +0,0 @@
---
layout: default
sectionid: blog
---
<div class="row-fluid">
<div class="span4 recent">
<h3>Recent posts</h3>
<ul class="unstyled">
{% for post in site.posts limit: 5 %}
<li{% if page.title == post.title %} class="active"{% endif %}><a href="{{ post.url }}">{{ post.title }}</a></li>
{% endfor %}
</ul>
</div>
<div class="span8 simple-page">
<div class="text-item blog inner">
<h2 class="date">
<span>{{ page.title }}</span>
<span>{{ page.date | date: "%B %e, %Y" }} · {{ page.author | upcase }}</span>
</h2>
{% if page.image %}<img src="{{ page.image }}" alt="{{ page.title }}" class="text-img" />{% endif %}
{{ content }}
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'druidio'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
</div>
</div>
</div>

View File

@ -1,60 +0,0 @@
.highlight { background: #ffffff; }
.highlight .c { color: #999988; font-style: italic } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { font-weight: bold } /* Keyword */
.highlight .o { font-weight: bold } /* Operator */
.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */
.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */
.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #999999 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #aaaaaa } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { font-weight: bold } /* Keyword.Constant */
.highlight .kd { font-weight: bold } /* Keyword.Declaration */
.highlight .kp { font-weight: bold } /* Keyword.Pseudo */
.highlight .kr { font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #009999 } /* Literal.Number */
.highlight .s { color: #d14 } /* Literal.String */
.highlight .na { color: #008080 } /* Name.Attribute */
.highlight .nb { color: #0086B3 } /* Name.Builtin */
.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */
.highlight .no { color: #008080 } /* Name.Constant */
.highlight .ni { color: #800080 } /* Name.Entity */
.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */
.highlight .nn { color: #555555 } /* Name.Namespace */
.highlight .nt { color: #000080 } /* Name.Tag */
.highlight .nv { color: #008080 } /* Name.Variable */
.highlight .ow { font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mf { color: #009999 } /* Literal.Number.Float */
.highlight .mh { color: #009999 } /* Literal.Number.Hex */
.highlight .mi { color: #009999 } /* Literal.Number.Integer */
.highlight .mo { color: #009999 } /* Literal.Number.Oct */
.highlight .sb { color: #d14 } /* Literal.String.Backtick */
.highlight .sc { color: #d14 } /* Literal.String.Char */
.highlight .sd { color: #d14 } /* Literal.String.Doc */
.highlight .s2 { color: #d14 } /* Literal.String.Double */
.highlight .se { color: #d14 } /* Literal.String.Escape */
.highlight .sh { color: #d14 } /* Literal.String.Heredoc */
.highlight .si { color: #d14 } /* Literal.String.Interpol */
.highlight .sx { color: #d14 } /* Literal.String.Other */
.highlight .sr { color: #009926 } /* Literal.String.Regex */
.highlight .s1 { color: #d14 } /* Literal.String.Single */
.highlight .ss { color: #990073 } /* Literal.String.Symbol */
.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */
.highlight .vc { color: #008080 } /* Name.Variable.Class */
.highlight .vg { color: #008080 } /* Name.Variable.Global */
.highlight .vi { color: #008080 } /* Name.Variable.Instance */
.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */

6
docs/content/css/toc.css Normal file
View File

@ -0,0 +1,6 @@
.toc ul {
list-style: none;
list-style-position: inside;
padding-left: 15px;
}

65
docs/content/toc.textile Normal file
View File

@ -0,0 +1,65 @@
---
---
<link rel="stylesheet" href="css/toc.css">
h1. Contents
* "Introduction":./Home.html
* "Download":./Download.html
* "Support":./Support.html
* "Contribute":./Contribute.html
h2. Getting Started
* "Tutorial: A First Look at Druid":./Tutorial:-A-First-Look-at-Druid.html
* "Tutorial: The Druid Cluster":./Tutorial:-The-Druid-Cluster.html
* "Loading Your Data":./Loading-Your-Data.html
* "Querying Your Data":./Querying-your-data.html
* "Booting a Production Cluster":./Booting-a-production-cluster.html
* "Examples":./Examples.html
* "Configuration":Configuration.html
h2. Data Ingestion
* "Realtime":./Realtime.html
* "Batch":./Batch-ingestion.html
* "Indexing Service":./Indexing-Service.html
h2. Querying
* "Querying":./Querying.html
** "Filters":./Filters.html
** "Aggregations":./Aggregations.html
** "Post Aggregations":./Post-aggregations.html
** "Granularities":./Granularities.html
* Query Types
** "GroupByQuery":./GroupByQuery.html
*** "OrderBy":./OrderBy.html
*** "Having":./Having.html
** "SearchQuery":./Having.html
*** "SearchQuerySpec":./SearchQuerySpec.html
** "SegmentMetadataQuery":./SegmentMetadataQuery.html
** "TimeBoundaryQuery":./TimeBoundaryQuery.html
** "TimeseriesQuery":./TimeseriesQuery.html
h2. Architecture
* "Design":./Design.html
* "Segments":./Segments.html
* Node Types
** "Compute":./Compute.html
** "Broker":./Broker.html
** "Master":./Master.html
*** "Rule Configuration":./Rule-Configuration.html
** "Realtime":./Realtime.html
*** "Firehose":./Firehose.html
*** "Plumber":./Plumber.html
* External Dependencies
** "Deep Storage":./Deep-Storage.html
** "MySQL":./MySQL.html
** "ZooKeeper":./ZooKeeper.html
* "Concepts and Terminology":./Concepts-and-Terminology.html
h2. Development
* "Versioning":./Versioning.html
* "Build From Source":./Build-from-source.html
* "Libraries":./Libraries.html
h2. Misc
* "Thanks":./Thanks.html

14
docs/css/blogs.css Normal file
View File

@ -0,0 +1,14 @@
.blog-listing {
margin-bottom: 70px;
}
.blog-entry {
margin-bottom: 70px;
}
.recents ul li {
font-weight: 400;
margin-bottom: 15px;
}

21
docs/css/index.css Normal file
View File

@ -0,0 +1,21 @@
.sub-text {
margin-top: 20px;
margin-bottom: 50px;
}
.main-marketing {
margin-bottom: 50px;
}
.main-marketing a {
color: #000000;
}
h2 {
font-weight: 400;
font-size: 30px;
}
.main-marketing img {
margin-bottom: 40px;
}

View File

@ -1,13 +1,7 @@
---
layout: default
title: Your New Jekyll Site
---
<div id="home">
<h1>Blog Posts</h1>
<ul class="posts">
{% for post in site.posts %}
<li><span>{{ post.date | date_to_string }}</span> &raquo; <a href="{{ post.url }}">{{ post.title }}</a></li>
{% endfor %}
</ul>
</div>
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="refresh" content="0;url=/content/Home.html" />
</head>
</html>

View File

@ -200,12 +200,12 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer
command.add("io.druid.cli.Main");
command.add("internal");
command.add("peon");
command.add(taskFile.toString());
command.add(statusFile.toString());
String nodeType = task.getNodeType();
if (nodeType != null) {
command.add(String.format("--nodeType %s", nodeType));
}
command.add(taskFile.toString());
command.add(statusFile.toString());
jsonMapper.writeValue(taskFile, task);

View File

@ -25,5 +25,6 @@ public interface Server
{
public String getScheme();
public String getHost();
public String getAddress();
public int getPort();
}

View File

@ -62,7 +62,7 @@ public class CuratorServiceAnnouncer implements ServiceAnnouncer
try {
instance = ServiceInstance.<Void>builder()
.name(serviceName)
.address(service.getHost())
.address(service.getHostNoPort())
.port(service.getPort())
.build();
}

View File

@ -147,9 +147,9 @@ public class DiscoveryModule implements Module
final Injector injector,
final Set<KeyHolder<DruidNode>> nodesToAnnounce,
final Lifecycle lifecycle
)
) throws Exception
{
lifecycle.addHandler(
lifecycle.addMaybeStartHandler(
new Lifecycle.Handler()
{
private volatile List<DruidNode> nodes = null;
@ -203,7 +203,7 @@ public class DiscoveryModule implements Module
.client(curator)
.build();
lifecycle.addHandler(
lifecycle.addMaybeStartHandler(
new Lifecycle.Handler()
{
@Override

View File

@ -63,6 +63,12 @@ public class ServerDiscoverySelector implements DiscoverySelector<Server>
{
@Override
public String getHost()
{
return String.format("%s:%d", getAddress(), getPort());
}
@Override
public String getAddress()
{
return instance.getAddress();
}

View File

@ -19,6 +19,8 @@
package io.druid.cli;
import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
import com.google.inject.Injector;
@ -28,6 +30,10 @@ import com.google.inject.TypeLiteral;
import com.google.inject.multibindings.MapBinder;
import com.google.inject.servlet.GuiceFilter;
import com.metamx.common.logger.Logger;
import druid.examples.flights.FlightsFirehoseFactory;
import druid.examples.rand.RandomFirehoseFactory;
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
import druid.examples.web.WebFirehoseFactory;
import io.airlift.command.Command;
import io.druid.guice.IndexingServiceModuleHelper;
import io.druid.guice.JacksonConfigProvider;
@ -41,6 +47,8 @@ import io.druid.guice.PolyBind;
import io.druid.indexing.common.actions.LocalTaskActionClientFactory;
import io.druid.indexing.common.actions.TaskActionClientFactory;
import io.druid.indexing.common.actions.TaskActionToolbox;
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
import io.druid.indexing.common.tasklogs.SwitchingTaskLogStreamer;
import io.druid.indexing.common.tasklogs.TaskLogStreamer;
import io.druid.indexing.common.tasklogs.TaskLogs;
@ -69,6 +77,12 @@ import io.druid.indexing.coordinator.scaling.ResourceManagementStrategy;
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementConfig;
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementStrategy;
import io.druid.indexing.coordinator.setup.WorkerSetupData;
import io.druid.initialization.DruidModule;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import io.druid.server.http.RedirectFilter;
import io.druid.server.http.RedirectInfo;
import io.druid.server.initialization.JettyServerInitializer;
@ -84,6 +98,7 @@ import org.eclipse.jetty.servlet.ServletHolder;
import org.eclipse.jetty.servlets.GzipFilter;
import org.eclipse.jetty.util.resource.ResourceCollection;
import java.util.Arrays;
import java.util.List;
/**
@ -105,7 +120,7 @@ public class CliOverlord extends ServerRunnable
protected List<Object> getModules()
{
return ImmutableList.<Object>of(
new Module()
new DruidModule()
{
@Override
public void configure(Binder binder)
@ -199,6 +214,27 @@ public class CliOverlord extends ServerRunnable
JsonConfigProvider.bind(binder, "druid.indexer.autoscale", SimpleResourceManagementConfig.class);
}
@Override
public List<? extends com.fasterxml.jackson.databind.Module> getJacksonModules()
{
return Arrays.<com.fasterxml.jackson.databind.Module>asList(
new SimpleModule("RealtimeModule")
.registerSubtypes(
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
new NamedType(FlightsFirehoseFactory.class, "flights"),
new NamedType(RandomFirehoseFactory.class, "rand"),
new NamedType(WebFirehoseFactory.class, "webstream"),
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
new NamedType(ClippedFirehoseFactory.class, "clipped"),
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
new NamedType(IrcFirehoseFactory.class, "irc"),
new NamedType(StaticS3FirehoseFactory.class, "s3"),
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
)
);
}
}
);
}

View File

@ -19,6 +19,8 @@
package io.druid.cli;
import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
@ -28,6 +30,10 @@ import com.google.inject.Module;
import com.google.inject.multibindings.MapBinder;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.common.logger.Logger;
import druid.examples.flights.FlightsFirehoseFactory;
import druid.examples.rand.RandomFirehoseFactory;
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
import druid.examples.web.WebFirehoseFactory;
import io.airlift.command.Arguments;
import io.airlift.command.Command;
import io.airlift.command.Option;
@ -45,18 +51,26 @@ import io.druid.indexing.common.actions.RemoteTaskActionClientFactory;
import io.druid.indexing.common.actions.TaskActionClientFactory;
import io.druid.indexing.common.config.TaskConfig;
import io.druid.indexing.common.index.ChatHandlerProvider;
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
import io.druid.indexing.common.index.EventReceivingChatHandlerProvider;
import io.druid.indexing.common.index.NoopChatHandlerProvider;
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
import io.druid.indexing.coordinator.TaskRunner;
import io.druid.indexing.coordinator.ThreadPoolTaskRunner;
import io.druid.indexing.worker.executor.ChatHandlerResource;
import io.druid.indexing.worker.executor.ExecutorLifecycle;
import io.druid.indexing.worker.executor.ExecutorLifecycleConfig;
import io.druid.initialization.DruidModule;
import io.druid.query.QuerySegmentWalker;
import io.druid.segment.loading.DataSegmentKiller;
import io.druid.segment.loading.S3DataSegmentKiller;
import io.druid.segment.loading.SegmentLoaderConfig;
import io.druid.segment.loading.StorageLocationConfig;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import io.druid.server.initialization.JettyServerInitializer;
import org.eclipse.jetty.server.Server;
@ -90,7 +104,7 @@ public class CliPeon extends GuiceRunnable
protected List<Object> getModules()
{
return ImmutableList.<Object>of(
new Module()
new DruidModule()
{
@Override
public void configure(Binder binder)
@ -143,6 +157,27 @@ public class CliPeon extends GuiceRunnable
LifecycleModule.register(binder, Server.class);
}
@Override
public List<? extends com.fasterxml.jackson.databind.Module> getJacksonModules()
{
return Arrays.<com.fasterxml.jackson.databind.Module>asList(
new SimpleModule("RealtimeModule")
.registerSubtypes(
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
new NamedType(FlightsFirehoseFactory.class, "flights"),
new NamedType(RandomFirehoseFactory.class, "rand"),
new NamedType(WebFirehoseFactory.class, "webstream"),
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
new NamedType(ClippedFirehoseFactory.class, "clipped"),
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
new NamedType(IrcFirehoseFactory.class, "irc"),
new NamedType(StaticS3FirehoseFactory.class, "s3"),
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
)
);
}
}
);
}

View File

@ -35,9 +35,16 @@ import io.druid.client.InventoryView;
import io.druid.client.ServerView;
import io.druid.guice.NoopSegmentPublisherProvider;
import io.druid.guice.RealtimeModule;
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
import io.druid.initialization.DruidModule;
import io.druid.segment.loading.DataSegmentPusher;
import io.druid.segment.realtime.SegmentPublisher;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import io.druid.server.coordination.DataSegmentAnnouncer;
import io.druid.timeline.DataSegment;
@ -88,7 +95,14 @@ public class CliRealtimeExample extends ServerRunnable
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
new NamedType(FlightsFirehoseFactory.class, "flights"),
new NamedType(RandomFirehoseFactory.class, "rand"),
new NamedType(WebFirehoseFactory.class, "webstream")
new NamedType(WebFirehoseFactory.class, "webstream"),
new NamedType(KafkaFirehoseFactory.class, "kafka"),
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
new NamedType(ClippedFirehoseFactory.class, "clipped"),
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
new NamedType(IrcFirehoseFactory.class, "irc"),
new NamedType(StaticS3FirehoseFactory.class, "s3"),
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
)
);
}

View File

@ -85,6 +85,8 @@ public class ConvertProperties implements Runnable
new Rename("druid.indexer.terminateResources.duration", "druid.indexer.autoscale.terminatePeriod"),
new Rename("druid.indexer.terminateResources.originDateTime", "druid.indexer.autoscale.originTime"),
new Rename("druid.indexer.autoscaling.strategy", "druid.indexer.autoscale.strategy"),
new Rename("druid.indexer.logs.s3bucket", "druid.indexer.logs.s3Bucket"),
new Rename("druid.indexer.logs.s3prefix", "druid.indexer.logs.s3Prefix"),
new Rename("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion", "druid.indexer.autoscale.workerIdleTimeout"),
new Rename("druid.indexer.maxScalingDuration", "druid.indexer.autoscale.scalingTimeout"),
new Rename("druid.indexer.numEventsToTrack", "druid.indexer.autoscale.numEventsToTrack"),
@ -122,7 +124,7 @@ public class ConvertProperties implements Runnable
}
File outFile = new File(outFilename);
if (!outFile.getParentFile().exists()) {
if (outFile.getParentFile() != null && !outFile.getParentFile().exists()) {
outFile.getParentFile().mkdirs();
}
@ -144,8 +146,10 @@ public class ConvertProperties implements Runnable
for (PropertyConverter converter : converters) {
if (converter.canHandle(property)) {
for (Map.Entry<String, String> entry : converter.convert(fromFile).entrySet()) {
++count;
updatedProps.setProperty(entry.getKey(), entry.getValue());
if (entry.getValue() != null) {
++count;
updatedProps.setProperty(entry.getKey(), entry.getValue());
}
}
handled = true;
}

View File

@ -49,6 +49,11 @@ public class Rename implements PropertyConverter
@Override
public Map<String, String> convert(Properties properties)
{
return ImmutableMap.of(newProperty, properties.getProperty(property));
final String value = properties.getProperty(property);
if (value != null) {
return ImmutableMap.of(newProperty, value);
} else {
return ImmutableMap.of();
}
}
}

View File

@ -26,7 +26,13 @@ import com.google.inject.Binder;
import com.google.inject.Key;
import com.google.inject.TypeLiteral;
import com.google.inject.multibindings.MapBinder;
import druid.examples.flights.FlightsFirehoseFactory;
import druid.examples.rand.RandomFirehoseFactory;
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
import druid.examples.web.WebFirehoseFactory;
import io.druid.cli.QueryJettyServerInitializer;
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
import io.druid.initialization.DruidModule;
import io.druid.query.QuerySegmentWalker;
import io.druid.segment.realtime.DbSegmentPublisher;
@ -34,7 +40,11 @@ import io.druid.segment.realtime.FireDepartment;
import io.druid.segment.realtime.NoopSegmentPublisher;
import io.druid.segment.realtime.RealtimeManager;
import io.druid.segment.realtime.SegmentPublisher;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
import io.druid.server.initialization.JettyServerInitializer;
import org.eclipse.jetty.server.Server;
@ -80,7 +90,17 @@ public class RealtimeModule implements DruidModule
return Arrays.<Module>asList(
new SimpleModule("RealtimeModule")
.registerSubtypes(
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2")
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
new NamedType(FlightsFirehoseFactory.class, "flights"),
new NamedType(RandomFirehoseFactory.class, "rand"),
new NamedType(WebFirehoseFactory.class, "webstream"),
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
new NamedType(ClippedFirehoseFactory.class, "clipped"),
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
new NamedType(IrcFirehoseFactory.class, "irc"),
new NamedType(StaticS3FirehoseFactory.class, "s3"),
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
)
);
}