mirror of https://github.com/apache/druid.git
Merge branch 'master' into fix-config
This commit is contained in:
commit
4a7a28a7f6
|
@ -2,4 +2,4 @@ name: Your New Jekyll Site
|
||||||
pygments: true
|
pygments: true
|
||||||
markdown: redcarpet
|
markdown: redcarpet
|
||||||
redcarpet:
|
redcarpet:
|
||||||
extensions: ["no_intra_emphasis", "fenced_code_blocks", "autolink", "tables", "with_toc_data"]
|
extensions: ["no_intra_emphasis", "fenced_code_blocks", "disable_indented_code_blocks", "tables", "with_toc_data"]
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
<!-- Start page_footer include -->
|
||||||
|
<div class="container">
|
||||||
|
<footer>
|
||||||
|
<div class="container">
|
||||||
|
<hr>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-4">
|
||||||
|
<address>
|
||||||
|
<strong>CONTACT US</strong>
|
||||||
|
<a href="mailto:info@druid.io">info@druid.io</a>
|
||||||
|
</address>
|
||||||
|
<address>
|
||||||
|
<strong>Metamarkets</strong>
|
||||||
|
625 2nd Street, Suite #230<br>
|
||||||
|
San Francisco, CA 94017<br>
|
||||||
|
<div class="soc">
|
||||||
|
<a href="https://twitter.com/druidio"></a>
|
||||||
|
<a href="https://github.com/metamx/druid" class="github"></a>
|
||||||
|
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
||||||
|
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<ul class="col-md-4 list-unstyled">
|
||||||
|
<li><a href="/"><strong>DRUID</strong></a></li>
|
||||||
|
<li><a href="/druid.html">What is Druid?</a></li>
|
||||||
|
<li><a href="/downloads.html">Downloads</a></li>
|
||||||
|
<li><a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation</a></li>
|
||||||
|
</ul>
|
||||||
|
<ul class="col-md-4 list-unstyled">
|
||||||
|
<li><a href="/community.html"><strong>SUPPORT</strong></a></li>
|
||||||
|
<li><a href="/community.html">Community</a></li>
|
||||||
|
<li><a href="/faq.html">FAQ</a></li>
|
||||||
|
<li><a href="/licensing.html">Licensing</a></li>
|
||||||
|
<li><a href="/blog"><strong>BLOG</strong></a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
<script type="text/javascript">
|
||||||
|
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
||||||
|
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
||||||
|
</script>
|
||||||
|
<script type="text/javascript">
|
||||||
|
try {
|
||||||
|
var pageTracker = _gat._getTracker("UA-40280432-1");
|
||||||
|
pageTracker._trackPageview();
|
||||||
|
} catch(err) {}
|
||||||
|
</script>
|
||||||
|
<!-- stop page_footer include -->
|
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
<!-- Start page_header include -->
|
||||||
|
<div class="navbar navbar-inverse navbar-static-top">
|
||||||
|
<div class="container druid-navbar">
|
||||||
|
<div class="navbar-header">
|
||||||
|
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
||||||
|
<span class="icon-bar"></span>
|
||||||
|
<span class="icon-bar"></span>
|
||||||
|
<span class="icon-bar"></span>
|
||||||
|
</button>
|
||||||
|
<a class="navbar-brand" href="/">Druid</a>
|
||||||
|
</div>
|
||||||
|
<div class="navbar-collapse collapse">
|
||||||
|
<ul class="nav navbar-nav">
|
||||||
|
<li {% if page.id == 'home' %} class="active"{% endif %}><a href="/">Home</a></li>
|
||||||
|
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}><a href="/druid.html">What is Druid?</a></li>
|
||||||
|
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}><a href="/downloads.html">Downloads</a></li>
|
||||||
|
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
|
||||||
|
<li {% if page.sectionid == 'community' %} class="active"{% endif %}><a href="/community.html">Community</a></li>
|
||||||
|
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}><a href="/faq.html">FAQ</a></li>
|
||||||
|
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}><a href="/blog">Blog</a></li>
|
||||||
|
<li class="divider"></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- Stop page_header include --->
|
|
@ -0,0 +1,19 @@
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<meta name="description" content="">
|
||||||
|
<meta name="author" content="druid">
|
||||||
|
|
||||||
|
<title>Druid | {{page.title}}</title>
|
||||||
|
|
||||||
|
<!-- Latest compiled and minified CSS -->
|
||||||
|
<link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.css">
|
||||||
|
|
||||||
|
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
||||||
|
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="//druid.io/css/main.css">
|
||||||
|
<link rel="stylesheet" href="//druid.io/css/header.css">
|
||||||
|
<link rel="stylesheet" href="//druid.io/css/footer.css">
|
||||||
|
<link rel="stylesheet" href="//druid.io/css/syntax.css">
|
||||||
|
|
||||||
|
|
|
@ -1,147 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8" />
|
|
||||||
<title>Druid | {{page.title}}</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/bootstrap.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/bootstrap-responsive.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/syntax.css" media="all" />
|
|
||||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
|
|
||||||
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
|
||||||
<script src="http://code.jquery.com/jquery.js"></script>
|
|
||||||
<script src="/js/bootstrap.min.js"></script>
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="wrapper">
|
|
||||||
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span12">
|
|
||||||
<div class="navbar navbar-inverse custom">
|
|
||||||
<div class="navbar-inner">
|
|
||||||
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
</button>
|
|
||||||
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
|
|
||||||
<div class="nav-collapse collapse">
|
|
||||||
<ul class="nav">
|
|
||||||
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
|
|
||||||
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
|
|
||||||
<a href="/blog">Blog</a>
|
|
||||||
</li>
|
|
||||||
<li class="pull-right">
|
|
||||||
<span>BETA</span>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% if page.id == 'home' %}
|
|
||||||
<h3>Druid is open-source infrastructure for real²time exploratory analytics on large datasets.</h3>
|
|
||||||
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
<div class="container custom main-cont">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span3">
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>CONTACT US</span>
|
|
||||||
<a href="mailto:info@druid.io">info@druid.io</a>
|
|
||||||
</div>
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>Metamarkets</span>
|
|
||||||
625 2nd Street, Suite #230<br/>
|
|
||||||
San Francisco, CA 94017
|
|
||||||
<div class="soc">
|
|
||||||
<a href="https://twitter.com/druidio"></a>
|
|
||||||
<a href="https://github.com/metamx/druid" class="github"></a>
|
|
||||||
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
|
||||||
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="span9">
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/">DRUID</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">SUPPORT</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/licensing.html">Licensing</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/blog">BLOG</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<div class="logo-block">
|
|
||||||
<span class="logo custom">
|
|
||||||
<a href="/"></a>
|
|
||||||
</span>
|
|
||||||
<p>is an open source project sponsored by<br/> Metamarkets.</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</footer>
|
|
||||||
<script type="text/javascript">
|
|
||||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
|
||||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
|
||||||
</script>
|
|
||||||
<script type="text/javascript">
|
|
||||||
try {
|
|
||||||
var pageTracker = _gat._getTracker("UA-40280432-1");
|
|
||||||
pageTracker._trackPageview();
|
|
||||||
} catch(err) {}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
{% include site_head.html %}
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="css/docs.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{% include page_header.html %}
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
<div class="page-header">
|
||||||
|
<h1>Documentation</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-3 toc" id="toc">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="col-md-9 doc-content">
|
||||||
|
{{ content }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% include page_footer.html %}
|
||||||
|
<script src="http://code.jquery.com/jquery.js"></script>
|
||||||
|
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
|
||||||
|
<script>
|
||||||
|
$(function(){
|
||||||
|
$("#toc").load("toc.html");
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
<div class="row-fluid">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
|
@ -1,147 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8" />
|
|
||||||
<title>Druid | {{page.title}}</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="css/bootstrap.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="css/bootstrap-responsive.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="css/syntax.css" media="all" />
|
|
||||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
|
|
||||||
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
|
||||||
<script src="http://code.jquery.com/jquery.js"></script>
|
|
||||||
<script src="/js/bootstrap.min.js"></script>
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="wrapper">
|
|
||||||
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span12">
|
|
||||||
<div class="navbar navbar-inverse custom">
|
|
||||||
<div class="navbar-inner">
|
|
||||||
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
</button>
|
|
||||||
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
|
|
||||||
<div class="nav-collapse collapse">
|
|
||||||
<ul class="nav">
|
|
||||||
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
|
|
||||||
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
|
|
||||||
<a href="/blog">Blog</a>
|
|
||||||
</li>
|
|
||||||
<li class="pull-right">
|
|
||||||
<span>BETA</span>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% if page.id == 'home' %}
|
|
||||||
<h3>Druid is open-source infrastructure for real²time exploratory analytics on large datasets.</h3>
|
|
||||||
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
<div class="container custom main-cont">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span3">
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>CONTACT US</span>
|
|
||||||
<a href="mailto:info@druid.io">info@druid.io</a>
|
|
||||||
</div>
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>Metamarkets</span>
|
|
||||||
625 2nd Street, Suite #230<br/>
|
|
||||||
San Francisco, CA 94017
|
|
||||||
<div class="soc">
|
|
||||||
<a href="https://twitter.com/druidio"></a>
|
|
||||||
<a href="https://github.com/metamx/druid" class="github"></a>
|
|
||||||
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
|
||||||
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="span9">
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/">DRUID</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">SUPPORT</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/licensing.html">Licensing</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/blog">BLOG</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<div class="logo-block">
|
|
||||||
<span class="logo custom">
|
|
||||||
<a href="/"></a>
|
|
||||||
</span>
|
|
||||||
<p>is an open source project sponsored by<br/> Metamarkets.</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</footer>
|
|
||||||
<script type="text/javascript">
|
|
||||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
|
||||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
|
||||||
</script>
|
|
||||||
<script type="text/javascript">
|
|
||||||
try {
|
|
||||||
var pageTracker = _gat._getTracker("UA-40280432-1");
|
|
||||||
pageTracker._trackPageview();
|
|
||||||
} catch(err) {}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span10 offset1{% if page.id != 'home' %} simple-page{% endif %}{% if page.sectionid == 'faq' %} faq-page{% endif %}">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
|
@ -1,44 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
sectionid: blog
|
|
||||||
---
|
|
||||||
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span4 recent">
|
|
||||||
<h3>Recent posts</h3>
|
|
||||||
<ul class="unstyled">
|
|
||||||
{% for post in site.posts limit: 5 %}
|
|
||||||
<li{% if page.title == post.title %} class="active"{% endif %}><a href="{{ post.url }}">{{ post.title }}</a></li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="span8 simple-page">
|
|
||||||
<div class="text-item blog inner">
|
|
||||||
<h2 class="date">
|
|
||||||
<span>{{ page.title }}</span>
|
|
||||||
<span>{{ page.date | date: "%B %e, %Y" }} · {{ page.author | upcase }}</span>
|
|
||||||
</h2>
|
|
||||||
|
|
||||||
{% if page.image %}<img src="{{ page.image }}" alt="{{ page.title }}" class="text-img" />{% endif %}
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
<div id="disqus_thread"></div>
|
|
||||||
<script type="text/javascript">
|
|
||||||
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
|
|
||||||
var disqus_shortname = 'druidio'; // required: replace example with your forum shortname
|
|
||||||
|
|
||||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
|
||||||
(function() {
|
|
||||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
|
||||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
|
||||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
|
|
||||||
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
|
@ -1,20 +1,26 @@
|
||||||
---
|
---
|
||||||
layout: docs_default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Aggregations are specifications of processing over metrics available in Druid.
|
Aggregations are specifications of processing over metrics available in Druid.
|
||||||
Available aggregations are:
|
Available aggregations are:
|
||||||
|
|
||||||
|
### Count aggregator
|
||||||
|
|
||||||
|
`count` computes the row count that match the filters
|
||||||
|
|
||||||
|
```json
|
||||||
|
{ "type" : "count", "name" : <output_name> }
|
||||||
|
```
|
||||||
|
|
||||||
### Sum aggregators
|
### Sum aggregators
|
||||||
|
|
||||||
#### `longSum` aggregator
|
#### `longSum` aggregator
|
||||||
|
|
||||||
computes the sum of values as a 64-bit, signed integer
|
computes the sum of values as a 64-bit, signed integer
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "longSum",
|
{ "type" : "longSum", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||||
"name" : <output_name>,
|
```
|
||||||
"fieldName" : <metric_name>
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
`name` – output name for the summed value
|
`name` – output name for the summed value
|
||||||
`fieldName` – name of the metric column to sum over
|
`fieldName` – name of the metric column to sum over
|
||||||
|
@ -23,20 +29,9 @@ computes the sum of values as a 64-bit, signed integer
|
||||||
|
|
||||||
Computes the sum of values as 64-bit floating point value. Similar to `longSum`
|
Computes the sum of values as 64-bit floating point value. Similar to `longSum`
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "doubleSum",
|
{ "type" : "doubleSum", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||||
"name" : <output_name>,
|
```
|
||||||
"fieldName" : <metric_name>
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
### Count aggregator
|
|
||||||
|
|
||||||
`count` computes the row count that match the filters
|
|
||||||
|
|
||||||
<code>{
|
|
||||||
"type" : "count",
|
|
||||||
"name" : <output_name>,
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
### Min / Max aggregators
|
### Min / Max aggregators
|
||||||
|
|
||||||
|
@ -44,21 +39,17 @@ Computes the sum of values as 64-bit floating point value. Similar to `longSum`
|
||||||
|
|
||||||
`min` computes the minimum metric value
|
`min` computes the minimum metric value
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "min",
|
{ "type" : "min", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||||
"name" : <output_name>,
|
```
|
||||||
"fieldName" : <metric_name>
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
#### `max` aggregator
|
#### `max` aggregator
|
||||||
|
|
||||||
`max` computes the maximum metric value
|
`max` computes the maximum metric value
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "max",
|
{ "type" : "max", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||||
"name" : <output_name>,
|
```
|
||||||
"fieldName" : <metric_name>
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
### JavaScript aggregator
|
### JavaScript aggregator
|
||||||
|
|
||||||
|
@ -66,25 +57,27 @@ Computes an arbitrary JavaScript function over a set of columns (both metrics an
|
||||||
|
|
||||||
All JavaScript functions must return numerical values.
|
All JavaScript functions must return numerical values.
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type": "javascript",
|
{ "type": "javascript", "name": "<output_name>",
|
||||||
"name": "<output_name>",
|
"fieldNames" : [ <column1>, <column2>, ... ],
|
||||||
"fieldNames" : [ <column1>, <column2>, ... ],
|
"fnAggregate" : "function(current, column1, column2, ...) {
|
||||||
"fnAggregate" : "function(current, column1, column2, ...) {
|
<updates partial aggregate (current) based on the current row values>
|
||||||
<updates partial aggregate (current) based on the current row values>
|
return <updated partial aggregate>
|
||||||
return <updated partial aggregate>
|
}",
|
||||||
}"
|
"fnCombine" : "function(partialA, partialB) { return <combined partial results>; }",
|
||||||
"fnCombine" : "function(partialA, partialB) { return <combined partial results>; }"
|
"fnReset" : "function() { return <initial value>; }"
|
||||||
"fnReset" : "function() { return <initial value>; }"
|
}
|
||||||
}</code>
|
```
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type": "javascript",
|
{
|
||||||
"name": "sum(log(x)/y) + 10",
|
"type": "javascript",
|
||||||
"fieldNames": ["x", "y"],
|
"name": "sum(log(x)/y) + 10",
|
||||||
"fnAggregate" : "function(current, a, b) { return current + (Math.log(a) * b); }"
|
"fieldNames": ["x", "y"],
|
||||||
"fnCombine" : "function(partialA, partialB) { return partialA + partialB; }"
|
"fnAggregate" : "function(current, a, b) { return current + (Math.log(a) * b); }",
|
||||||
"fnReset" : "function() { return 10; }"
|
"fnCombine" : "function(partialA, partialB) { return partialA + partialB; }",
|
||||||
}</code>
|
"fnReset" : "function() { return 10; }"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Batch Data Ingestion
|
Batch Data Ingestion
|
||||||
====================
|
====================
|
||||||
|
@ -18,52 +18,52 @@ HadoopDruidIndexer
|
||||||
|
|
||||||
Located at `com.metamx.druid.indexer.HadoopDruidIndexerMain` can be run like
|
Located at `com.metamx.druid.indexer.HadoopDruidIndexerMain` can be run like
|
||||||
|
|
||||||
<code>
|
```
|
||||||
java -cp hadoop_config_path:druid_indexer_selfcontained_jar_path com.metamx.druid.indexer.HadoopDruidIndexerMain <config_file>
|
java -cp hadoop_config_path:druid_indexer_selfcontained_jar_path com.metamx.druid.indexer.HadoopDruidIndexerMain <config_file>
|
||||||
</code>
|
```
|
||||||
|
|
||||||
The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals) of the data you are processing. The config\_file is a path to a file (the “specFile”) that contains JSON and an example looks like:
|
The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals) of the data you are processing. The config\_file is a path to a file (the "specFile") that contains JSON and an example looks like:
|
||||||
|
|
||||||
<code>
|
```
|
||||||
{
|
{
|
||||||
"dataSource": "the_data_source",
|
"dataSource": "the_data_source",
|
||||||
"timestampColumn": "ts",
|
"timestampColumn": "ts",
|
||||||
"timestampFormat": "<iso, millis, posix, auto or any Joda time format>",
|
"timestampFormat": "<iso, millis, posix, auto or any Joda time format>",
|
||||||
"dataSpec": {
|
"dataSpec": {
|
||||||
"format": "<csv, tsv, or json>",
|
"format": "<csv, tsv, or json>",
|
||||||
"columns": ["ts", "column_1", "column_2", "column_3", "column_4", "column_5"],
|
"columns": ["ts", "column_1", "column_2", "column_3", "column_4", "column_5"],
|
||||||
"dimensions": ["column_1", "column_2", "column_3"]
|
"dimensions": ["column_1", "column_2", "column_3"]
|
||||||
},
|
},
|
||||||
"granularitySpec": {
|
"granularitySpec": {
|
||||||
"type":"uniform",
|
"type":"uniform",
|
||||||
"intervals":["<ISO8601 interval:http://en.wikipedia.org/wiki/ISO_8601#Time_intervals>"],
|
"intervals":["<ISO8601 interval:http://en.wikipedia.org/wiki/ISO_8601#Time_intervals>"],
|
||||||
"gran":"day"
|
"gran":"day"
|
||||||
},
|
},
|
||||||
"pathSpec": { "type": "granularity",
|
"pathSpec": { "type": "granularity",
|
||||||
"dataGranularity": "hour",
|
"dataGranularity": "hour",
|
||||||
"inputPath": "s3n://billy-bucket/the/data/is/here",
|
"inputPath": "s3n://billy-bucket/the/data/is/here",
|
||||||
"filePattern": ".*" },
|
"filePattern": ".*" },
|
||||||
"rollupSpec": { "aggs": [
|
"rollupSpec": { "aggs": [
|
||||||
{ "type": "count", "name":"event_count" },
|
{ "type": "count", "name":"event_count" },
|
||||||
{ "type": "doubleSum", "fieldName": "column_4", "name": "revenue" },
|
{ "type": "doubleSum", "fieldName": "column_4", "name": "revenue" },
|
||||||
{ "type": "longSum", "fieldName" : "column_5", "name": "clicks" }
|
{ "type": "longSum", "fieldName" : "column_5", "name": "clicks" }
|
||||||
],
|
],
|
||||||
"rollupGranularity": "minute"},
|
"rollupGranularity": "minute"},
|
||||||
"workingPath": "/tmp/path/on/hdfs",
|
"workingPath": "/tmp/path/on/hdfs",
|
||||||
"segmentOutputPath": "s3n://billy-bucket/the/segments/go/here",
|
"segmentOutputPath": "s3n://billy-bucket/the/segments/go/here",
|
||||||
"leaveIntermediate": "false",
|
"leaveIntermediate": "false",
|
||||||
"partitionsSpec": {
|
"partitionsSpec": {
|
||||||
"targetPartitionSize": 5000000
|
"targetPartitionSize": 5000000
|
||||||
},
|
},
|
||||||
"updaterJobSpec": {
|
"updaterJobSpec": {
|
||||||
"type":"db",
|
"type":"db",
|
||||||
"connectURI":"jdbc:mysql://localhost:7980/test_db",
|
"connectURI":"jdbc:mysql://localhost:7980/test_db",
|
||||||
"user":"username",
|
"user":"username",
|
||||||
"password":"passmeup",
|
"password":"passmeup",
|
||||||
"segmentTable":"segments"
|
"segmentTable":"segments"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
### Hadoop indexer config
|
### Hadoop indexer config
|
||||||
|
|
||||||
|
@ -100,10 +100,12 @@ Is a type of data loader that expects data to be laid out in a specific path for
|
||||||
|
|
||||||
For example, if the sample config were run with the interval 2012-06-01/2012-06-02, it would expect data at the paths
|
For example, if the sample config were run with the interval 2012-06-01/2012-06-02, it would expect data at the paths
|
||||||
|
|
||||||
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=00
|
```
|
||||||
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=01
|
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=00
|
||||||
...
|
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=01
|
||||||
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=23
|
...
|
||||||
|
s3n://billy-bucket/the/data/is/here/y=2012/m=06/d=01/H=23
|
||||||
|
```
|
||||||
|
|
||||||
### Rollup specification
|
### Rollup specification
|
||||||
|
|
||||||
|
@ -116,7 +118,7 @@ The indexing process has the ability to roll data up as it processes the incomin
|
||||||
|
|
||||||
### Partitioning specification
|
### Partitioning specification
|
||||||
|
|
||||||
Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension “last\_name” into two segments: one with all values from A-M and one with all values from N-Z.
|
Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.
|
||||||
|
|
||||||
To use this option, the indexer must be given a target partition size. It can then find a good set of partition ranges on its own.
|
To use this option, the indexer must be given a target partition size. It can then find a good set of partition ranges on its own.
|
||||||
|
|
||||||
|
@ -132,7 +134,7 @@ This is a specification of the properties that tell the job how to update metada
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|type|“db” is the only value available|yes|
|
|type|"db" is the only value available|yes|
|
||||||
|connectURI|a valid JDBC url to MySQL|yes|
|
|connectURI|a valid JDBC url to MySQL|yes|
|
||||||
|user|username for db|yes|
|
|user|username for db|yes|
|
||||||
|password|password for db|yes|
|
|password|password for db|yes|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
# Booting a Single Node Cluster #
|
# Booting a Single Node Cluster #
|
||||||
|
|
||||||
|
@ -27,4 +27,4 @@ cd whirr
|
||||||
git checkout trunk
|
git checkout trunk
|
||||||
mvn clean install -Dmaven.test.failure.ignore=true -Dcheckstyle.skip
|
mvn clean install -Dmaven.test.failure.ignore=true -Dcheckstyle.skip
|
||||||
sp;bin/whirr launch-cluster --config recipes/druid.properties
|
sp;bin/whirr launch-cluster --config recipes/druid.properties
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Broker
|
Broker
|
||||||
======
|
======
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
### Clone and Build from Source
|
### Clone and Build from Source
|
||||||
|
|
||||||
|
@ -20,5 +20,6 @@ LICENSE client eclipse_formatting.xml index-common merger realtime
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find the example executables in the examples/bin directory:
|
You can find the example executables in the examples/bin directory:
|
||||||
|
|
||||||
* run_example_server.sh
|
* run_example_server.sh
|
||||||
* run_example_client.sh
|
* run_example_client.sh
|
||||||
|
|
|
@ -1,114 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
A Druid cluster consists of various node types that need to be set up depending on your use case. See our [Design](Design.html) docs for a description of the different node types.
|
|
||||||
|
|
||||||
Setup Scripts
|
|
||||||
-------------
|
|
||||||
|
|
||||||
One of our community members, [housejester](https://github.com/housejester/), contributed some scripts to help with setting up a cluster. Checkout the [github](https://github.com/housejester/druid-test-harness) and [wiki](https://github.com/housejester/druid-test-harness/wiki/Druid-Test-Harness).
|
|
||||||
|
|
||||||
Minimum Physical Layout: Absolute Minimum
|
|
||||||
-----------------------------------------
|
|
||||||
|
|
||||||
As a special case, the absolute minimum setup is one of the standalone examples for realtime ingestion and querying; see [Examples](Examples.html) that can easily run on one machine with one core and 1GB RAM. This layout can be set up to try some basic queries with Druid.
|
|
||||||
|
|
||||||
Minimum Physical Layout: Experimental Testing with 4GB of RAM
|
|
||||||
-------------------------------------------------------------
|
|
||||||
|
|
||||||
This layout can be used to load some data from deep storage onto a Druid compute node for the first time. A minimal physical layout for a 1 or 2 core machine with 4GB of RAM is:
|
|
||||||
|
|
||||||
1. node1: [Master](Master.html) + metadata service + zookeeper + [Compute](Compute.html)
|
|
||||||
2. transient nodes: indexer
|
|
||||||
|
|
||||||
This setup is only reasonable to prove that a configuration works. It would not be worthwhile to use this layout for performance measurement.
|
|
||||||
|
|
||||||
Comfortable Physical Layout: Pilot Project with Multiple Machines
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
|
|
||||||
*The machine size “flavors” are using AWS/EC2 terminology for descriptive purposes only and is not meant to imply that AWS/EC2 is required or recommended. Another cloud provider or your own hardware can also work.*
|
|
||||||
|
|
||||||
A minimal physical layout not constrained by cores that demonstrates parallel querying and realtime, using AWS-EC2 “small”/m1.small (one core, with 1.7GB of RAM) or larger, no realtime, is:
|
|
||||||
|
|
||||||
1. node1: [Master](Master.html) (m1.small)
|
|
||||||
2. node2: metadata service (m1.small)
|
|
||||||
3. node3: zookeeper (m1.small)
|
|
||||||
4. node4: [Broker](Broker.html) (m1.small or m1.medium or m1.large)
|
|
||||||
5. node5: [Compute](Compute.html) (m1.small or m1.medium or m1.large)
|
|
||||||
6. node6: [Compute](Compute.html) (m1.small or m1.medium or m1.large)
|
|
||||||
7. node7: [Realtime](Realtime.html) (m1.small or m1.medium or m1.large)
|
|
||||||
8. transient nodes: indexer
|
|
||||||
|
|
||||||
This layout naturally lends itself to adding more RAM and core to Compute nodes, and to adding many more Compute nodes. Depending on the actual load, the Master, metadata server, and Zookeeper might need to use larger machines.
|
|
||||||
|
|
||||||
High Availability Physical Layout
|
|
||||||
---------------------------------
|
|
||||||
|
|
||||||
*The machine size “flavors” are using AWS/EC2 terminology for descriptive purposes only and is not meant to imply that AWS/EC2 is required or recommended. Another cloud provider or your own hardware can also work.*
|
|
||||||
|
|
||||||
An HA layout allows full rolling restarts and heavy volume:
|
|
||||||
|
|
||||||
1. node1: [Master](Master.html) (m1.small or m1.medium or m1.large)
|
|
||||||
2. node2: [Master](Master.html) (m1.small or m1.medium or m1.large) (backup)
|
|
||||||
3. node3: metadata service (c1.medium or m1.large)
|
|
||||||
4. node4: metadata service (c1.medium or m1.large) (backup)
|
|
||||||
5. node5: zookeeper (c1.medium)
|
|
||||||
6. node6: zookeeper (c1.medium)
|
|
||||||
7. node7: zookeeper (c1.medium)
|
|
||||||
8. node8: [Broker](Broker.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
|
|
||||||
9. node9: [Broker](Broker.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge) (backup)
|
|
||||||
10. node10: [Compute](Compute.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
|
|
||||||
11. node11: [Compute](Compute.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
|
|
||||||
12. node12: [Realtime](Realtime.html) (m1.small or m1.medium or m1.large or m2.xlarge or m2.2xlarge or m2.4xlarge)
|
|
||||||
13. transient nodes: indexer
|
|
||||||
|
|
||||||
Sizing for Cores and RAM
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
The Compute and Broker nodes will use as many cores as are available, depending on usage, so it is best to keep these on dedicated machines. The upper limit of effectively utilized cores is not well characterized yet and would depend on types of queries, query load, and the schema. Compute daemons should have a heap a size of at least 1GB per core for normal usage, but could be squeezed into a smaller heap for testing. Since in-memory caching is essential for good performance, even more RAM is better. Broker nodes will use RAM for caching, so they do more than just route queries.
|
|
||||||
|
|
||||||
The effective utilization of cores by Zookeeper, MySQL, and Master nodes is likely to be between 1 and 2 for each process/daemon, so these could potentially share a machine with lots of cores. These daemons work with heap a size between 500MB and 1GB.
|
|
||||||
|
|
||||||
Storage
|
|
||||||
-------
|
|
||||||
|
|
||||||
Indexed segments should be kept in a permanent store accessible by all nodes like AWS S3 or HDFS or equivalent. Currently Druid supports S3, but this will be extended soon.
|
|
||||||
|
|
||||||
Local disk (“ephemeral” on AWS EC2) for caching is recommended over network mounted storage (example of mounted: AWS EBS, Elastic Block Store) in order to avoid network delays during times of heavy usage. If your data center is suitably provisioned for networked storage, perhaps with separate LAN/NICs just for storage, then mounted might work fine.
|
|
||||||
|
|
||||||
Setup
|
|
||||||
-----
|
|
||||||
|
|
||||||
Setting up a cluster is essentially just firing up all of the nodes you want with the proper [configuration](configuration.html). One thing to be aware of is that there are a few properties in the configuration that potentially need to be set individually for each process:
|
|
||||||
|
|
||||||
<code>
|
|
||||||
druid.server.type=historical|realtime
|
|
||||||
druid.host=someHostOrIPaddrWithPort
|
|
||||||
druid.port=8080
|
|
||||||
</code>
|
|
||||||
|
|
||||||
`druid.server.type` should be set to “historical” for your compute nodes and realtime for the realtime nodes. The master will only assign segments to a “historical” node and the broker has some intelligence around its ability to cache results when talking to a realtime node. This does not need to be set for the master or the broker.
|
|
||||||
|
|
||||||
`druid.host` should be set to the hostname and port that can be used to talk to the given server process. Basically, someone should be able to send a request to http://\${druid.host}/ and actually talk to the process.
|
|
||||||
|
|
||||||
`druid.port` should be set to the port that the server should listen on. In the vast majority of cases, this port should be the same as what is on `druid.host`.
|
|
||||||
|
|
||||||
Build/Run
|
|
||||||
---------
|
|
||||||
|
|
||||||
The simplest way to build and run from the repository is to run `mvn package` from the base directory and then take `druid-services/target/druid-services-*-selfcontained.jar` and push that around to your machines; the jar does not need to be expanded, and since it contains the main() methods for each kind of service, it is **not** invoked with java ~~jar. It can be run from a normal java command-line by just including it on the classpath and then giving it the main class that you want to run. For example one instance of the Compute node/service can be started like this:
|
|
||||||
\<pre\>
|
|
||||||
<code>
|
|
||||||
java~~Duser.timezone=UTC ~~Dfile.encoding=UTF-8~~cp compute/:druid-services/target/druid-services~~\*~~selfcontained.jar com.metamx.druid.http.ComputeMain
|
|
||||||
</code>
|
|
||||||
|
|
||||||
</pre>
|
|
||||||
The following table shows the possible services and fully qualified class for main().
|
|
||||||
|
|
||||||
|service|main class|
|
|
||||||
|-------|----------|
|
|
||||||
|[ Realtime ]( Realtime .html)|com.metamx.druid.realtime.RealtimeMain|
|
|
||||||
|[ Master ]( Master .html)|com.metamx.druid.http.MasterMain|
|
|
||||||
|[ Broker ]( Broker .html)|com.metamx.druid.http.BrokerMain|
|
|
||||||
|[ Compute ]( Compute .html)|com.metamx.druid.http.ComputeMain|
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Compute
|
Compute
|
||||||
=======
|
=======
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Concepts and Terminology
|
Concepts and Terminology
|
||||||
========================
|
========================
|
||||||
|
|
||||||
- **Aggregators:** A mechanism for combining records during realtime incremental indexing, Hadoop batch indexing, and in queries.
|
* **Aggregators**: A mechanism for combining records during realtime incremental indexing, Hadoop batch indexing, and in queries.
|
||||||
- **DataSource:** A table-like view of data; specified in a “specFile” and in a query.
|
* **DataSource**: A table-like view of data; specified in a "specFile" and in a query.
|
||||||
- **Granularity:** The time interval corresponding to aggregation by time.
|
* **Granularity**: The time interval corresponding to aggregation by time.
|
||||||
- The *indexGranularity* setting in a schema is used to aggregate input (ingest) records within an interval into a single output (internal) record.
|
* **indexGranularity**: specifies the granularity used to bucket timestamps within a segment.
|
||||||
- The *segmentGranularity* is the interval specifying how internal records are stored together in a single file.
|
* **segmentGranularity**: specifies the granularity of the segment, i.e. the amount of time a segment will represent
|
||||||
|
* **Segment**: A collection of (internal) records that are stored and processed together.
|
||||||
- **Segment:** A collection of (internal) records that are stored and processed together.
|
* **Shard**: A sub-partition of the data in a segment. It is possible to have multiple segments represent all data for a given segmentGranularity.
|
||||||
- **Shard:** A unit of partitioning data across machine. TODO: clarify; by time or other dimensions?
|
* **specFile**: is specification for services in JSON format; see [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html)
|
||||||
- **specFile** is specification for services in JSON format; see [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html)
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
This describes the basic server configuration that is loaded by all the server processes; the same file is loaded by all. See also the json “specFile” descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
|
This describes the basic server configuration that is loaded by all the server processes; the same file is loaded by all. See also the json "specFile" descriptions in [Realtime](Realtime.html) and [Batch-ingestion](Batch-ingestion.html).
|
||||||
|
|
||||||
JVM Configuration Best Practices
|
JVM Configuration Best Practices
|
||||||
================================
|
================================
|
||||||
|
@ -17,63 +17,63 @@ Basic Service Configuration
|
||||||
|
|
||||||
Configuration of the various nodes is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath. Note: as a future item, I’d like to consolidate all of the various configuration into a yaml/JSON based configuration files.
|
Configuration of the various nodes is done via Java properties. These can either be provided as `-D` system properties on the java command line or they can be passed in via a file called `runtime.properties` that exists on the classpath. Note: as a future item, I’d like to consolidate all of the various configuration into a yaml/JSON based configuration files.
|
||||||
|
|
||||||
The periodic time intervals (like “PT1M”) are [ISO8601 intervals](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals)
|
The periodic time intervals (like "PT1M") are [ISO8601 intervals](http://en.wikipedia.org/wiki/ISO_8601#Time_intervals)
|
||||||
|
|
||||||
An example runtime.properties is as follows:
|
An example runtime.properties is as follows:
|
||||||
|
|
||||||
<code>
|
```
|
||||||
# S3 access
|
# S3 access
|
||||||
com.metamx.aws.accessKey=<S3 access key>
|
com.metamx.aws.accessKey=<S3 access key>
|
||||||
com.metamx.aws.secretKey=<S3 secret_key>
|
com.metamx.aws.secretKey=<S3 secret_key>
|
||||||
|
|
||||||
# thread pool size for servicing queries
|
# thread pool size for servicing queries
|
||||||
druid.client.http.connections=30
|
druid.client.http.connections=30
|
||||||
|
|
||||||
# JDBC connection string for metadata database
|
# JDBC connection string for metadata database
|
||||||
druid.database.connectURI=
|
druid.database.connectURI=
|
||||||
druid.database.user=user
|
druid.database.user=user
|
||||||
druid.database.password=password
|
druid.database.password=password
|
||||||
# time between polling for metadata database
|
# time between polling for metadata database
|
||||||
druid.database.poll.duration=PT1M
|
druid.database.poll.duration=PT1M
|
||||||
druid.database.segmentTable=prod_segments
|
druid.database.segmentTable=prod_segments
|
||||||
|
|
||||||
# Path on local FS for storage of segments; dir will be created if needed
|
# Path on local FS for storage of segments; dir will be created if needed
|
||||||
druid.paths.indexCache=/tmp/druid/indexCache
|
druid.paths.indexCache=/tmp/druid/indexCache
|
||||||
# Path on local FS for storage of segment metadata; dir will be created if needed
|
# Path on local FS for storage of segment metadata; dir will be created if needed
|
||||||
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
|
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
|
||||||
|
|
||||||
druid.request.logging.dir=/tmp/druid/log
|
druid.request.logging.dir=/tmp/druid/log
|
||||||
|
|
||||||
druid.server.maxSize=300000000000
|
druid.server.maxSize=300000000000
|
||||||
|
|
||||||
# ZK quorum IPs
|
# ZK quorum IPs
|
||||||
druid.zk.service.host=
|
druid.zk.service.host=
|
||||||
# ZK path prefix for Druid-usage of zookeeper, Druid will create multiple paths underneath this znode
|
# ZK path prefix for Druid-usage of zookeeper, Druid will create multiple paths underneath this znode
|
||||||
druid.zk.paths.base=/druid
|
druid.zk.paths.base=/druid
|
||||||
# ZK path for discovery, the only path not to default to anything
|
# ZK path for discovery, the only path not to default to anything
|
||||||
druid.zk.paths.discoveryPath=/druid/discoveryPath
|
druid.zk.paths.discoveryPath=/druid/discoveryPath
|
||||||
|
|
||||||
# the host:port as advertised to clients
|
# the host:port as advertised to clients
|
||||||
druid.host=someHostOrIPaddrWithPort
|
druid.host=someHostOrIPaddrWithPort
|
||||||
# the port on which to listen, this port should line up with the druid.host value
|
# the port on which to listen, this port should line up with the druid.host value
|
||||||
druid.port=8080
|
druid.port=8080
|
||||||
|
|
||||||
com.metamx.emitter.logging=true
|
com.metamx.emitter.logging=true
|
||||||
com.metamx.emitter.logging.level=debug
|
com.metamx.emitter.logging.level=debug
|
||||||
|
|
||||||
druid.processing.formatString=processing_%s
|
druid.processing.formatString=processing_%s
|
||||||
druid.processing.numThreads=3
|
druid.processing.numThreads=3
|
||||||
|
|
||||||
|
|
||||||
druid.computation.buffer.size=100000000
|
druid.computation.buffer.size=100000000
|
||||||
|
|
||||||
# S3 dest for realtime indexer
|
# S3 dest for realtime indexer
|
||||||
druid.pusher.s3.bucket=
|
druid.pusher.s3.bucket=
|
||||||
druid.pusher.s3.baseKey=
|
druid.pusher.s3.baseKey=
|
||||||
|
|
||||||
druid.bard.cache.sizeInBytes=40000000
|
druid.bard.cache.sizeInBytes=40000000
|
||||||
druid.master.merger.service=blah_blah
|
druid.master.merger.service=blah_blah
|
||||||
</code>
|
```
|
||||||
|
|
||||||
Configuration groupings
|
Configuration groupings
|
||||||
-----------------------
|
-----------------------
|
||||||
|
@ -91,7 +91,7 @@ These properties are for connecting with S3 and using it to pull down segments.
|
||||||
|
|
||||||
### JDBC connection
|
### JDBC connection
|
||||||
|
|
||||||
These properties specify the jdbc connection and other configuration around the “segments table” database. The only processes that connect to the DB with these properties are the [Master](Master.html) and [Indexing service](Indexing-service.html). This is tested on MySQL.
|
These properties specify the jdbc connection and other configuration around the "segments table" database. The only processes that connect to the DB with these properties are the [Master](Master.html) and [Indexing service](Indexing-service.html). This is tested on MySQL.
|
||||||
|
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|--------|-----------|-------|
|
|--------|-----------|-------|
|
||||||
|
@ -142,7 +142,7 @@ These are properties that the compute nodes use
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|--------|-----------|-------|
|
|--------|-----------|-------|
|
||||||
|`druid.server.maxSize`|The maximum number of bytes worth of segment that the node wants assigned to it. This is not a limit that the compute nodes actually enforce, they just publish it to the master and trust the master to do the right thing|none|
|
|`druid.server.maxSize`|The maximum number of bytes worth of segment that the node wants assigned to it. This is not a limit that the compute nodes actually enforce, they just publish it to the master and trust the master to do the right thing|none|
|
||||||
|`druid.server.type`|Specifies the type of the node. This is published via ZK and depending on the value the node will be treated specially by the Master/Broker. Allowed values are “realtime” or “historical”. This is a configuration parameter because the plan is to allow for a more configurable cluster composition. At the current time, all realtime nodes should just be “realtime” and all compute nodes should just be “compute”|none|
|
|`druid.server.type`|Specifies the type of the node. This is published via ZK and depending on the value the node will be treated specially by the Master/Broker. Allowed values are "realtime" or "historical". This is a configuration parameter because the plan is to allow for a more configurable cluster composition. At the current time, all realtime nodes should just be "realtime" and all compute nodes should just be "compute"|none|
|
||||||
|
|
||||||
### Emitter Properties
|
### Emitter Properties
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ The Druid servers emit various metrics and alerts via something we call an [Emit
|
||||||
|
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|--------|-----------|-------|
|
|--------|-----------|-------|
|
||||||
|`com.metamx.emitter.logging`|Set to “true” to use the logging emitter|none|
|
|`com.metamx.emitter.logging`|Set to "true" to use the logging emitter|none|
|
||||||
|`com.metamx.emitter.logging.level`|Sets the level to log at|debug|
|
|`com.metamx.emitter.logging.level`|Sets the level to log at|debug|
|
||||||
|`com.metamx.emitter.logging.class`|Sets the class to log at|com.metamx.emiter.core.LoggingEmitter|
|
|`com.metamx.emitter.logging.class`|Sets the class to log at|com.metamx.emiter.core.LoggingEmitter|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
If you are interested in contributing to the code, we accept [pull requests](https://help.github.com/articles/using-pull-requests). Note: we have only just completed decoupling our Metamarkets-specific code from the code base and we took some short-cuts in interface design to make it happen. So, there are a number of interfaces that exist right now which are likely to be in flux. If you are embedding Druid in your system, it will be safest for the time being to only extend/implement interfaces that this wiki describes, as those are intended as stable (unless otherwise mentioned).
|
If you are interested in contributing to the code, we accept [pull requests](https://help.github.com/articles/using-pull-requests). Note: we have only just completed decoupling our Metamarkets-specific code from the code base and we took some short-cuts in interface design to make it happen. So, there are a number of interfaces that exist right now which are likely to be in flux. If you are embedding Druid in your system, it will be safest for the time being to only extend/implement interfaces that this wiki describes, as those are intended as stable (unless otherwise mentioned).
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Deep storage is where segments are stored. It is a storage mechanism that Druid does not provide. This deep storage infrastructure defines the level of durability of your data, as long as Druid nodes can see this storage infrastructure and get at the segments stored on it, you will not lose data no matter how many Druid nodes you lose. If segments disappear from this storage layer, then you will lose whatever data those segments represented.
|
Deep storage is where segments are stored. It is a storage mechanism that Druid does not provide. This deep storage infrastructure defines the level of durability of your data, as long as Druid nodes can see this storage infrastructure and get at the segments stored on it, you will not lose data no matter how many Druid nodes you lose. If segments disappear from this storage layer, then you will lose whatever data those segments represented.
|
||||||
|
|
||||||
|
@ -11,10 +11,12 @@ S3-compatible deep storage is basically either S3 or something like riak-cs whic
|
||||||
|
|
||||||
S3 configuration parameters are
|
S3 configuration parameters are
|
||||||
|
|
||||||
com.metamx.aws.accessKey=<S3 access key>
|
```
|
||||||
com.metamx.aws.secretKey=<S3 secret_key>
|
com.metamx.aws.accessKey=<S3 access key>
|
||||||
druid.pusher.s3.bucket=<bucket to store in>
|
com.metamx.aws.secretKey=<S3 secret_key>
|
||||||
druid.pusher.s3.baseKey=<base key prefix to use, i.e. what directory>
|
druid.pusher.s3.bucket=<bucket to store in>
|
||||||
|
druid.pusher.s3.baseKey=<base key prefix to use, i.e. what directory>
|
||||||
|
```
|
||||||
|
|
||||||
## HDFS
|
## HDFS
|
||||||
|
|
||||||
|
@ -22,8 +24,10 @@ As of 0.4.0, HDFS can be used for storage of segments as well.
|
||||||
|
|
||||||
In order to use hdfs for deep storage, you need to set the following configuration on your realtime nodes.
|
In order to use hdfs for deep storage, you need to set the following configuration on your realtime nodes.
|
||||||
|
|
||||||
druid.pusher.hdfs=true
|
```
|
||||||
druid.pusher.hdfs.storageDirectory=<directory for storing segments>
|
druid.pusher.hdfs=true
|
||||||
|
druid.pusher.hdfs.storageDirectory=<directory for storing segments>
|
||||||
|
```
|
||||||
|
|
||||||
If you are using the Hadoop indexer, set your output directory to be a location on Hadoop and it will work
|
If you are using the Hadoop indexer, set your output directory to be a location on Hadoop and it will work
|
||||||
|
|
||||||
|
@ -34,9 +38,11 @@ A local mount can be used for storage of segments as well. This allows you to u
|
||||||
|
|
||||||
In order to use a local mount for deep storage, you need to set the following configuration on your realtime nodes.
|
In order to use a local mount for deep storage, you need to set the following configuration on your realtime nodes.
|
||||||
|
|
||||||
druid.pusher.local=true
|
```
|
||||||
druid.pusher.local.storageDirectory=<directory for storing segments>
|
druid.pusher.local=true
|
||||||
|
druid.pusher.local.storageDirectory=<directory for storing segments>
|
||||||
|
```
|
||||||
|
|
||||||
Note that you should generally set `druid.pusher.local.storageDirectory` to something different from `druid.paths.indexCache`.
|
Note that you should generally set `druid.pusher.local.storageDirectory` to something different from `druid.paths.indexCache`.
|
||||||
|
|
||||||
If you are using the Hadoop indexer in local mode, then just give it a local file as your output directory and it will work.
|
If you are using the Hadoop indexer in local mode, then just give it a local file as your output directory and it will work.
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
|
|
||||||
For a comprehensive look at the architecture of Druid, read the [White Paper](http://static.druid.io/docs/druid.pdf).
|
For a comprehensive look at the architecture of Druid, read the [White Paper](http://static.druid.io/docs/druid.pdf).
|
||||||
|
|
||||||
What is Druid?
|
What is Druid?
|
||||||
==============
|
==============
|
||||||
|
|
||||||
Druid is a system built to allow fast (“real-time”) access to large sets of seldom-changing data. It was designed with the intent of being a service and maintaining 100% uptime in the face of code deployments, machine failures and other eventualities of a production system. It can be useful for back-office use cases as well, but design decisions were made explicitly targetting an always-up service.
|
Druid is a system built to allow fast ("real-time") access to large sets of seldom-changing data. It was designed with the intent of being a service and maintaining 100% uptime in the face of code deployments, machine failures and other eventualities of a production system. It can be useful for back-office use cases as well, but design decisions were made explicitly targetting an always-up service.
|
||||||
|
|
||||||
Druid currently allows for single-table queries in a similar manner to [Dremel](http://research.google.com/pubs/pub36632.html) and [PowerDrill](http://www.vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf). It adds to the mix
|
Druid currently allows for single-table queries in a similar manner to [Dremel](http://research.google.com/pubs/pub36632.html) and [PowerDrill](http://www.vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf). It adds to the mix
|
||||||
|
|
||||||
|
@ -18,20 +19,21 @@ Druid currently allows for single-table queries in a similar manner to [Dremel](
|
||||||
|
|
||||||
As far as a comparison of systems is concerned, Druid sits in between PowerDrill and Dremel on the spectrum of functionality. It implements almost everything Dremel offers (Dremel handles arbitrary nested data structures while Druid only allows for a single level of array-based nesting) and gets into some of the interesting data layout and compression methods from PowerDrill.
|
As far as a comparison of systems is concerned, Druid sits in between PowerDrill and Dremel on the spectrum of functionality. It implements almost everything Dremel offers (Dremel handles arbitrary nested data structures while Druid only allows for a single level of array-based nesting) and gets into some of the interesting data layout and compression methods from PowerDrill.
|
||||||
|
|
||||||
Druid is a good fit for products that require real-time data ingestion of a single, large data stream. Especially if you are targetting no-downtime operation and are building your product on top of a time-oriented summarization of the incoming data stream. Druid is probably not the right solution if you care more about query flexibility and raw data access than query speed and no-downtime operation. When talking about query speed it is important to clarify what “fast” means, with Druid it is entirely within the realm of possibility (we have done it) to achieve queries that run in single-digit seconds across a 6TB data set.
|
Druid is a good fit for products that require real-time data ingestion of a single, large data stream. Especially if you are targetting no-downtime operation and are building your product on top of a time-oriented summarization of the incoming data stream. Druid is probably not the right solution if you care more about query flexibility and raw data access than query speed and no-downtime operation. When talking about query speed it is important to clarify what "fast" means, with Druid it is entirely within the realm of possibility (we have done it) to achieve queries that run in single-digit seconds across a 6TB data set.
|
||||||
|
|
||||||
### Architecture
|
### Architecture
|
||||||
|
|
||||||
Druid is architected as a grouping of systems each with a distinct role and together they form a working system. The name comes from the Druid class in many role-playing games: it is a shape-shifter, capable of taking many different forms to fulfill various different roles in a group.
|
Druid is architected as a grouping of systems each with a distinct role and together they form a working system. The name comes from the Druid class in many role-playing games: it is a shape-shifter, capable of taking many different forms to fulfill various different roles in a group.
|
||||||
|
|
||||||
The node types that currently exist are:
|
The node types that currently exist are:
|
||||||
\* **Compute** nodes are the workhorses that handle storage and querying on “historical” data (non-realtime)
|
|
||||||
\* **Realtime** nodes ingest data in real-time, they are in charge of listening to a stream of incoming data and making it available immediately inside the Druid system. As data they have ingested ages, they hand it off to the compute nodes.
|
|
||||||
\* **Master** nodes act as coordinators. They look over the grouping of computes and make sure that data is available, replicated and in a generally “optimal” configuration.
|
|
||||||
\* **Broker** nodes understand the topology of data across all of the other nodes in the cluster and re-write and route queries accordingly
|
|
||||||
\* **Indexer** nodes form a cluster of workers to load batch and real-time data into the system as well as allow for alterations to the data stored in the system (also known as the Indexing Service)
|
|
||||||
|
|
||||||
This separation allows each node to only care about what it is best at. By separating Compute and Realtime, we separate the memory concerns of listening on a real-time stream of data and processing it for entry into the system. By separating the Master and Broker, we separate the needs for querying from the needs for maintaining “good” data distribution across the cluster.
|
* **Compute** nodes are the workhorses that handle storage and querying on "historical" data (non-realtime)
|
||||||
|
* **Realtime** nodes ingest data in real-time, they are in charge of listening to a stream of incoming data and making it available immediately inside the Druid system. As data they have ingested ages, they hand it off to the compute nodes.
|
||||||
|
* **Master** nodes act as coordinators. They look over the grouping of computes and make sure that data is available, replicated and in a generally "optimal" configuration.
|
||||||
|
* **Broker** nodes understand the topology of data across all of the other nodes in the cluster and re-write and route queries accordingly
|
||||||
|
* **Indexer** nodes form a cluster of workers to load batch and real-time data into the system as well as allow for alterations to the data stored in the system (also known as the Indexing Service)
|
||||||
|
|
||||||
|
This separation allows each node to only care about what it is best at. By separating Compute and Realtime, we separate the memory concerns of listening on a real-time stream of data and processing it for entry into the system. By separating the Master and Broker, we separate the needs for querying from the needs for maintaining "good" data distribution across the cluster.
|
||||||
|
|
||||||
All nodes can be run in some highly available fashion. Either as symmetric peers in a share-nothing cluster or as hot-swap failover nodes.
|
All nodes can be run in some highly available fashion. Either as symmetric peers in a share-nothing cluster or as hot-swap failover nodes.
|
||||||
|
|
||||||
|
@ -39,7 +41,7 @@ Aside from these nodes, there are 3 external dependencies to the system:
|
||||||
|
|
||||||
1. A running [ZooKeeper](http://zookeeper.apache.org/) cluster for cluster service discovery and maintenance of current data topology
|
1. A running [ZooKeeper](http://zookeeper.apache.org/) cluster for cluster service discovery and maintenance of current data topology
|
||||||
2. A MySQL instance for maintenance of metadata about the data segments that should be served by the system
|
2. A MySQL instance for maintenance of metadata about the data segments that should be served by the system
|
||||||
3. A “deep storage” LOB store/file system to hold the stored segments
|
3. A "deep storage" LOB store/file system to hold the stored segments
|
||||||
|
|
||||||
### Data Storage
|
### Data Storage
|
||||||
|
|
||||||
|
@ -53,9 +55,9 @@ Getting data into the Druid system requires an indexing process. This gives the
|
||||||
- Bitmap compression
|
- Bitmap compression
|
||||||
- RLE (on the roadmap, but not yet implemented)
|
- RLE (on the roadmap, but not yet implemented)
|
||||||
|
|
||||||
The output of the indexing process is stored in a “deep storage” LOB store/file system ([Deep Storage](Deep Storage.html) for information about potential options). Data is then loaded by compute nodes by first downloading the data to their local disk and then memory mapping it before serving queries.
|
The output of the indexing process is stored in a "deep storage" LOB store/file system ([Deep Storage](Deep Storage.html) for information about potential options). Data is then loaded by compute nodes by first downloading the data to their local disk and then memory mapping it before serving queries.
|
||||||
|
|
||||||
If a compute node dies, it will no longer serve its segments, but given that the segments are still available on the “deep storage” any other node can simply download the segment and start serving it. This means that it is possible to actually remove all compute nodes from the cluster and then re-provision them without any data loss. It also means that if the “deep storage” is not available, the nodes can continue to serve the segments they have already pulled down (i.e. the cluster goes stale, not down).
|
If a compute node dies, it will no longer serve its segments, but given that the segments are still available on the "deep storage" any other node can simply download the segment and start serving it. This means that it is possible to actually remove all compute nodes from the cluster and then re-provision them without any data loss. It also means that if the "deep storage" is not available, the nodes can continue to serve the segments they have already pulled down (i.e. the cluster goes stale, not down).
|
||||||
|
|
||||||
In order for a segment to exist inside of the cluster, an entry has to be added to a table in a MySQL instance. This entry is a self-describing bit of metadata about the segment, it includes things like the schema of the segment, the size, and the location on deep storage. These entries are what the Master uses to know what data **should** be available on the cluster.
|
In order for a segment to exist inside of the cluster, an entry has to be added to a table in a MySQL instance. This entry is a self-describing bit of metadata about the segment, it includes things like the schema of the segment, the size, and the location on deep storage. These entries are what the Master uses to know what data **should** be available on the cluster.
|
||||||
|
|
||||||
|
@ -65,7 +67,7 @@ In order for a segment to exist inside of the cluster, an entry has to be added
|
||||||
- **Master** Can be run in a hot fail-over configuration. If no masters are running, then changes to the data topology will stop happening (no new data and no data balancing decisions), but the system will continue to run.
|
- **Master** Can be run in a hot fail-over configuration. If no masters are running, then changes to the data topology will stop happening (no new data and no data balancing decisions), but the system will continue to run.
|
||||||
- **Broker** Can be run in parallel or in hot fail-over.
|
- **Broker** Can be run in parallel or in hot fail-over.
|
||||||
- **Realtime** Depending on the semantics of the delivery stream, multiple of these can be run in parallel processing the exact same stream. They periodically checkpoint to disk and eventually push out to the Computes. Steps are taken to be able to recover from process death, but loss of access to the local disk can result in data loss if this is the only method of adding data to the system.
|
- **Realtime** Depending on the semantics of the delivery stream, multiple of these can be run in parallel processing the exact same stream. They periodically checkpoint to disk and eventually push out to the Computes. Steps are taken to be able to recover from process death, but loss of access to the local disk can result in data loss if this is the only method of adding data to the system.
|
||||||
- **“deep storage” file system** If this is not available, new data will not be able to enter the cluster, but the cluster will continue operating as is.
|
- **"deep storage" file system** If this is not available, new data will not be able to enter the cluster, but the cluster will continue operating as is.
|
||||||
- **MySQL** If this is not available, the master will be unable to find out about new segments in the system, but it will continue with its current view of the segments that should exist in the cluster.
|
- **MySQL** If this is not available, the master will be unable to find out about new segments in the system, but it will continue with its current view of the segments that should exist in the cluster.
|
||||||
- **ZooKeeper** If this is not available, data topology changes will not be able to be made, but the Brokers will maintain their most recent view of the data topology and continue serving requests accordingly.
|
- **ZooKeeper** If this is not available, data topology changes will not be able to be made, but the Brokers will maintain their most recent view of the data topology and continue serving requests accordingly.
|
||||||
|
|
||||||
|
@ -77,12 +79,8 @@ For filters at a more granular level than what the Broker can prune based on, th
|
||||||
|
|
||||||
Once it knows the rows that match the current query, it can access the columns it cares about for those rows directly without having to load data that it is just going to throw away.
|
Once it knows the rows that match the current query, it can access the columns it cares about for those rows directly without having to load data that it is just going to throw away.
|
||||||
|
|
||||||
The following diagram shows the data flow for queries without showing batch indexing:
|
|
||||||
|
|
||||||
![Simple Data Flow](https://raw.github.com/metamx/druid/master/doc/data_flow_simple.png "Simple Data Flow")
|
|
||||||
|
|
||||||
### In-memory?
|
### In-memory?
|
||||||
|
|
||||||
Druid is not always and only in-memory. When we first built it, it is true that it was all in-memory all the time, but as time went on the price-performance tradeoff ended up swinging towards keeping all of our customers data in memory all the time a non-starter. We then added the ability to memory map data and allow the OS to handle paging data in and out of memory on demand. Our production cluster is primarily configured to operate with this memory mapping behavior and we are definitely over-subscribed in terms of memory available vs. data a node is serving.
|
Druid is not always and only in-memory. When we first built it, it is true that it was all in-memory all the time, but as time went on the price-performance tradeoff ended up swinging towards keeping all of our customers data in memory all the time a non-starter. We then added the ability to memory map data and allow the OS to handle paging data in and out of memory on demand. Our production cluster is primarily configured to operate with this memory mapping behavior and we are definitely over-subscribed in terms of memory available vs. data a node is serving.
|
||||||
|
|
||||||
As you read some of the old blog posts or other literature about the project, you will see “in-memory” often touted as that is the history of where Druid came from, but the technical reality is that there is a spectrum of price vs. performance and being able to slide along it from all in-memory (high cost, great performance) to mostly on disk (low cost, low performance) is the important knob to be able to adjust.
|
As you read some of the old blog posts or other literature about the project, you will see "in-memory" often touted as that is the history of where Druid came from, but the technical reality is that there is a spectrum of price vs. performance and being able to slide along it from all in-memory (high cost, great performance) to mostly on disk (low cost, low performance) is the important knob to be able to adjust.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
A version may be declared as a release candidate if it has been deployed to a sizable production cluster. Release candidates are declared as stable after we feel fairly confident there are no major bugs in the version. Check out the [Versioning](Versioning.html) section for how we describe software versions.
|
A version may be declared as a release candidate if it has been deployed to a sizable production cluster. Release candidates are declared as stable after we feel fairly confident there are no major bugs in the version. Check out the [Versioning](Versioning.html) section for how we describe software versions.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
# Druid Personal Demo Cluster (DPDC)
|
# Druid Personal Demo Cluster (DPDC)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
We are not experts on Cassandra, if anything is incorrect about our portrayal, please let us know on the mailing list or via some other means. We will fix this page.
|
We are not experts on Cassandra, if anything is incorrect about our portrayal, please let us know on the mailing list or via some other means. We will fix this page.
|
||||||
|
|
||||||
Druid is highly optimized for scans and aggregations, it supports arbitrarily deep drill downs into data sets without the need to pre-compute, and it can ingest event streams in real-time and allow users to query events as they come in. Cassandra is a great key-value store and it has some features that allow you to use it to do more interesting things than what you can do with a pure key-value store. But, it is not built for the same use cases that Druid handles, namely regularly scanning over billions of entries per query.
|
Druid is highly optimized for scans and aggregations, it supports arbitrarily deep drill downs into data sets without the need to pre-compute, and it can ingest event streams in real-time and allow users to query events as they come in. Cassandra is a great key-value store and it has some features that allow you to use it to do more interesting things than what you can do with a pure key-value store. But, it is not built for the same use cases that Druid handles, namely regularly scanning over billions of entries per query.
|
||||||
|
|
||||||
Furthermore, Druid is fully read-consistent. Druid breaks down a data set into immutable chunks known as segments. All replicants always present the exact same view for the piece of data they are holding and we don’t have to worry about data synchronization. The tradeoff is that Druid has limited semantics for write and update operations. Cassandra, similar to Amazon’s Dynamo, has an eventually consistent data model. Writes are always supported but updates to data may take some time before all replicas sync up (data reconciliation is done at read time). This model favors availability and scalability over consistency.
|
Furthermore, Druid is fully read-consistent. Druid breaks down a data set into immutable chunks known as segments. All replicants always present the exact same view for the piece of data they are holding and we don’t have to worry about data synchronization. The tradeoff is that Druid has limited semantics for write and update operations. Cassandra, similar to Amazon’s Dynamo, has an eventually consistent data model. Writes are always supported but updates to data may take some time before all replicas sync up (data reconciliation is done at read time). This model favors availability and scalability over consistency.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Druid is a complementary addition to Hadoop. Hadoop is great at storing and making accessible large amounts of individually low-value data. Unfortunately, Hadoop is not great at providing query speed guarantees on top of that data, nor does it have very good operational characteristics for a customer-facing production system. Druid, on the other hand, excels at taking high-value summaries of the low-value data on Hadoop, making it available in a fast and always-on fashion, such that it could be exposed directly to a customer.
|
Druid is a complementary addition to Hadoop. Hadoop is great at storing and making accessible large amounts of individually low-value data. Unfortunately, Hadoop is not great at providing query speed guarantees on top of that data, nor does it have very good operational characteristics for a customer-facing production system. Druid, on the other hand, excels at taking high-value summaries of the low-value data on Hadoop, making it available in a fast and always-on fashion, such that it could be exposed directly to a customer.
|
||||||
|
|
||||||
Druid also requires some infrastructure to exist for “deep storage”. HDFS is one of the implemented options for this “deep storage”.
|
Druid also requires some infrastructure to exist for [deep storage](Deep-Storage.html). HDFS is one of the implemented options for this [deep storage](Deep-Storage.html).
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
The question of Druid versus Impala or Shark basically comes down to your product requirements and what the systems were designed to do.
|
The question of Druid versus Impala or Shark basically comes down to your product requirements and what the systems were designed to do.
|
||||||
|
|
||||||
|
@ -42,4 +42,4 @@ Impala/Shark, being based on data in HDFS or some other backing store, are limit
|
||||||
|
|
||||||
Druid supports timeseries and groupBy style queries. It doesn't have support for joins, which makes it a lot less flexible for generic processing.
|
Druid supports timeseries and groupBy style queries. It doesn't have support for joins, which makes it a lot less flexible for generic processing.
|
||||||
|
|
||||||
Impala/Shark support SQL style queries with full joins.
|
Impala/Shark support SQL style queries with full joins.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
###How does Druid compare to Redshift?
|
###How does Druid compare to Redshift?
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ It’s write semantics aren’t as fluid and does not support joins. ParAccel is
|
||||||
|
|
||||||
###Data distribution model
|
###Data distribution model
|
||||||
|
|
||||||
Druid’s data distribution, is segment based which exists on highly available “deep” storage, like S3 or HDFS. Scaling up (or down) does not require massive copy actions or downtime; in fact, losing any number of compute nodes does not result in data loss because new compute nodes can always be brought up by reading data from “deep” storage.
|
Druid’s data distribution, is segment based which exists on highly available "deep" storage, like S3 or HDFS. Scaling up (or down) does not require massive copy actions or downtime; in fact, losing any number of compute nodes does not result in data loss because new compute nodes can always be brought up by reading data from "deep" storage.
|
||||||
|
|
||||||
To contrast, ParAccel’s data distribution model is hash-based. Expanding the cluster requires re-hashing the data across the nodes, making it difficult to perform without taking downtime. Amazon’s Redshift works around this issue with a multi-step process:
|
To contrast, ParAccel’s data distribution model is hash-based. Expanding the cluster requires re-hashing the data across the nodes, making it difficult to perform without taking downtime. Amazon’s Redshift works around this issue with a multi-step process:
|
||||||
|
|
||||||
|
@ -37,4 +37,4 @@ ParAccel’s hash-based distribution generally means that replication is conduct
|
||||||
|
|
||||||
Along with column oriented structures, Druid uses indexing structures to speed up query execution when a filter is provided. Indexing structures do increase storage overhead (and make it more difficult to allow for mutation), but they can also significantly speed up queries.
|
Along with column oriented structures, Druid uses indexing structures to speed up query execution when a filter is provided. Indexing structures do increase storage overhead (and make it more difficult to allow for mutation), but they can also significantly speed up queries.
|
||||||
|
|
||||||
ParAccel does not appear to employ indexing strategies.
|
ParAccel does not appear to employ indexing strategies.
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
How does Druid compare to Vertica?
|
How does Druid compare to Vertica?
|
||||||
|
|
||||||
Vertica is similar to ParAccel/Redshift ([Druid-vs-Redshift](Druid-vs-Redshift.html)) described above in that it wasn’t built for real-time streaming data ingestion and it supports full SQL.
|
Vertica is similar to ParAccel/Redshift ([Druid-vs-Redshift](Druid-vs-Redshift.html)) described above in that it wasn’t built for real-time streaming data ingestion and it supports full SQL.
|
||||||
|
|
||||||
The other big difference is that instead of employing indexing, Vertica tries to optimize processing by leveraging run-length encoding (RLE) and other compression techniques along with a “projection” system that creates materialized copies of the data in a different sort order (to maximize the effectiveness of RLE).
|
The other big difference is that instead of employing indexing, Vertica tries to optimize processing by leveraging run-length encoding (RLE) and other compression techniques along with a "projection" system that creates materialized copies of the data in a different sort order (to maximize the effectiveness of RLE).
|
||||||
|
|
||||||
We are unclear about how Vertica handles data distribution and replication, so we cannot speak to if/how Druid is different.
|
We are unclear about how Vertica handles data distribution and replication, so we cannot speak to if/how Druid is different.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Examples
|
Examples
|
||||||
========
|
========
|
||||||
|
@ -15,21 +15,22 @@ There are two options for installing standalone Druid. Building from source, and
|
||||||
|
|
||||||
Clone Druid and build it:
|
Clone Druid and build it:
|
||||||
|
|
||||||
<code>git clone https://github.com/metamx/druid.git druid
|
``` bash
|
||||||
cd druid
|
git clone https://github.com/metamx/druid.git druid
|
||||||
git fetch --tags
|
cd druid
|
||||||
git checkout druid-0.4.30
|
git fetch --tags
|
||||||
./build.sh
|
git checkout druid-0.4.30
|
||||||
</code>
|
./build.sh
|
||||||
|
```
|
||||||
|
|
||||||
### Downloading the DSK (Druid Standalone Kit)
|
### Downloading the DSK (Druid Standalone Kit)
|
||||||
|
|
||||||
[Download](http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz) a stand-alone tarball and run it:
|
[Download](http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz) a stand-alone tarball and run it:
|
||||||
|
|
||||||
<code>
|
``` bash
|
||||||
tar -xzf druid-services-0.X.X-SNAPSHOT-bin.tar.gz
|
tar -xzf druid-services-0.X.X-SNAPSHOT-bin.tar.gz
|
||||||
cd druid-services-0.X.X-SNAPSHOT
|
cd druid-services-0.X.X-SNAPSHOT
|
||||||
</code>
|
```
|
||||||
|
|
||||||
Twitter Example
|
Twitter Example
|
||||||
---------------
|
---------------
|
||||||
|
@ -39,12 +40,12 @@ For a full tutorial based on the twitter example, check out this [Twitter Tutori
|
||||||
This Example uses a feature of Twitter that allows for sampling of it’s stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/metamx/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
|
This Example uses a feature of Twitter that allows for sampling of it’s stream. We sample the Twitter stream via our [TwitterSpritzerFirehoseFactory](https://github.com/metamx/druid/blob/master/examples/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java) class and use it to simulate the kinds of data you might ingest into Druid. Then, with the client part, the sample shows what kinds of analytics explorations you can do during and after the data is loaded.
|
||||||
|
|
||||||
### What you’ll learn
|
### What you’ll learn
|
||||||
\* See how large amounts of data gets ingested into Druid in real-time
|
* See how large amounts of data gets ingested into Druid in real-time
|
||||||
\* Learn how to do fast, interactive, analytics queries on that real-time data
|
* Learn how to do fast, interactive, analytics queries on that real-time data
|
||||||
|
|
||||||
### What you need
|
### What you need
|
||||||
\* A build of standalone Druid with the Twitter example (see above)
|
* A build of standalone Druid with the Twitter example (see above)
|
||||||
\* A Twitter username and password.
|
* A Twitter username and password.
|
||||||
|
|
||||||
### What you’ll do
|
### What you’ll do
|
||||||
|
|
||||||
|
@ -57,12 +58,15 @@ This uses `RandomFirehoseFactory` which emits a stream of random numbers (outCol
|
||||||
|
|
||||||
In a terminal window, (NOTE: If you are using the cloned Github repository these scripts are in ./examples/bin) start the server with:
|
In a terminal window, (NOTE: If you are using the cloned Github repository these scripts are in ./examples/bin) start the server with:
|
||||||
|
|
||||||
`./run_example_server.sh`
|
``` bash
|
||||||
`# type rand when prompted`
|
./run_example_server.sh # type rand when prompted
|
||||||
|
```
|
||||||
|
|
||||||
In another terminal window:
|
In another terminal window:
|
||||||
|
|
||||||
`./run_example_client.sh`
|
``` bash
|
||||||
`# type rand when prompted`
|
./run_example_client.sh # type rand when prompted
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
The result of the client query is in JSON format. The client makes a REST request using the program `curl` which is usually installed on Linux, Unix, and OSX by default.
|
The result of the client query is in JSON format. The client makes a REST request using the program `curl` which is usually installed on Linux, Unix, and OSX by default.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
A filter is a JSON object indicating which rows of data should be included in the computation for a query. It’s essentially the equivalent of the WHERE clause in SQL. Druid supports the following types of filters.
|
A filter is a JSON object indicating which rows of data should be included in the computation for a query. It’s essentially the equivalent of the WHERE clause in SQL. Druid supports the following types of filters.
|
||||||
|
|
||||||
|
@ -9,12 +9,9 @@ The simplest filter is a selector filter. The selector filter will match a speci
|
||||||
|
|
||||||
The grammar for a SELECTOR filter is as follows:
|
The grammar for a SELECTOR filter is as follows:
|
||||||
|
|
||||||
<code>"filter": {
|
``` json
|
||||||
"type": "selector",
|
"filter": { "type": "selector", "dimension": <dimension_string>, "value": <dimension_value_string> }
|
||||||
"dimension": <dimension_string>,
|
```
|
||||||
"value": <dimension_value_string>
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
This is the equivalent of `WHERE <dimension_string> = '<dimension_value_string>'`.
|
This is the equivalent of `WHERE <dimension_string> = '<dimension_value_string>'`.
|
||||||
|
|
||||||
|
@ -22,12 +19,9 @@ This is the equivalent of `WHERE <dimension_string> = '<dimension_value_string>'
|
||||||
|
|
||||||
The regular expression filter is similar to the selector filter, but using regular expressions. It matches the specified dimension with the given pattern. The pattern can be any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html).
|
The regular expression filter is similar to the selector filter, but using regular expressions. It matches the specified dimension with the given pattern. The pattern can be any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html).
|
||||||
|
|
||||||
<code>"filter": {
|
``` json
|
||||||
"type": "regex",
|
"filter": { "type": "regex", "dimension": <dimension_string>, "pattern": <pattern_string> }
|
||||||
"dimension": <dimension_string>,
|
```
|
||||||
"pattern": <pattern_string>
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
### Logical expression filters
|
### Logical expression filters
|
||||||
|
|
||||||
|
@ -35,11 +29,9 @@ The regular expression filter is similar to the selector filter, but using regul
|
||||||
|
|
||||||
The grammar for an AND filter is as follows:
|
The grammar for an AND filter is as follows:
|
||||||
|
|
||||||
<code>"filter": {
|
``` json
|
||||||
"type": "and",
|
"filter": { "type": "and", "fields": [<filter>, <filter>, ...] }
|
||||||
"fields": [<filter>, <filter>, ...]
|
```
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
The filters in fields can be any other filter defined on this page.
|
The filters in fields can be any other filter defined on this page.
|
||||||
|
|
||||||
|
@ -47,11 +39,9 @@ The filters in fields can be any other filter defined on this page.
|
||||||
|
|
||||||
The grammar for an OR filter is as follows:
|
The grammar for an OR filter is as follows:
|
||||||
|
|
||||||
<code>"filter": {
|
``` json
|
||||||
"type": "or",
|
"filter": { "type": "or", "fields": [<filter>, <filter>, ...] }
|
||||||
"fields": [<filter>, <filter>, ...]
|
```
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
The filters in fields can be any other filter defined on this page.
|
The filters in fields can be any other filter defined on this page.
|
||||||
|
|
||||||
|
@ -59,11 +49,9 @@ The filters in fields can be any other filter defined on this page.
|
||||||
|
|
||||||
The grammar for a NOT filter is as follows:
|
The grammar for a NOT filter is as follows:
|
||||||
|
|
||||||
<code>"filter": {
|
```json
|
||||||
"type": "not",
|
"filter": { "type": "not", "field": <filter> }
|
||||||
"field": <filter>
|
```
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
The filter specified at field can be any other filter defined on this page.
|
The filter specified at field can be any other filter defined on this page.
|
||||||
|
|
||||||
|
@ -73,19 +61,21 @@ The JavaScript filter matches a dimension against the specified JavaScript funct
|
||||||
|
|
||||||
The function takes a single argument, the dimension value, and returns either true or false.
|
The function takes a single argument, the dimension value, and returns either true or false.
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "javascript",
|
{
|
||||||
"dimension" : <dimension_string>,
|
"type" : "javascript",
|
||||||
"function" : "function(value) { <...> }"
|
"dimension" : <dimension_string>,
|
||||||
}
|
"function" : "function(value) { <...> }"
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
The following matches any dimension values for the dimension `name` between `'bar'` and `'foo'`
|
The following matches any dimension values for the dimension `name` between `'bar'` and `'foo'`
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "javascript",
|
{
|
||||||
"dimension" : "name",
|
"type" : "javascript",
|
||||||
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
|
"dimension" : "name",
|
||||||
}
|
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
|
||||||
</code>
|
}
|
||||||
|
```
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Firehoses describe the data stream source. They are pluggable and thus the configuration schema can and will vary based on the `type` of the firehose.
|
Firehoses describe the data stream source. They are pluggable and thus the configuration schema can and will vary based on the `type` of the firehose.
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ We describe the configuration of the Kafka firehose from the example below, but
|
||||||
|
|
||||||
- `consumerProps` is a map of properties for the Kafka consumer. The JSON object is converted into a Properties object and passed along to the Kafka consumer.
|
- `consumerProps` is a map of properties for the Kafka consumer. The JSON object is converted into a Properties object and passed along to the Kafka consumer.
|
||||||
- `feed` is the feed that the Kafka consumer should read from.
|
- `feed` is the feed that the Kafka consumer should read from.
|
||||||
- `parser` represents a parser that knows how to convert from String representations into the required `InputRow` representation that Druid uses. This is a potentially reusable piece that can be found in many of the firehoses that are based on text streams. The spec in the example describes a JSON feed (new-line delimited objects), with a timestamp column called “timestamp” in ISO8601 format and that it should not include the dimension “value” when processing. More information about the options available for the parser are available [here](https://github.com/metamx/druid/wiki/Firehose#parsing-data).
|
- `parser` represents a parser that knows how to convert from String representations into the required `InputRow` representation that Druid uses. This is a potentially reusable piece that can be found in many of the firehoses that are based on text streams. The spec in the example describes a JSON feed (new-line delimited objects), with a timestamp column called "timestamp" in ISO8601 format and that it should not include the dimension "value" when processing. More information about the options available for the parser are available [here](https://github.com/metamx/druid/wiki/Firehose#parsing-data).
|
||||||
|
|
||||||
Available Firehoses
|
Available Firehoses
|
||||||
-------------------
|
-------------------
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
The granularity field determines how data gets bucketed across the time dimension, i.e how it gets aggregated by hour, day, minute, etc.
|
The granularity field determines how data gets bucketed across the time dimension, i.e how it gets aggregated by hour, day, minute, etc.
|
||||||
|
|
||||||
|
@ -10,8 +10,9 @@ It can be specified either as a string for simple granularities or as an object
|
||||||
Simple granularities are specified as a string and bucket timestamps by their UTC time (i.e. days start at 00:00 UTC).
|
Simple granularities are specified as a string and bucket timestamps by their UTC time (i.e. days start at 00:00 UTC).
|
||||||
|
|
||||||
Supported granularity strings are: `all`, `none`, `minute`, `fifteen_minute`, `thirty_minute`, `hour` and `day`
|
Supported granularity strings are: `all`, `none`, `minute`, `fifteen_minute`, `thirty_minute`, `hour` and `day`
|
||||||
\* **`all`** buckets everything into a single bucket
|
|
||||||
\* **`none`** does not bucket data (it actually uses the granularity of the index - minimum here is `none` which means millisecond granularity). Using `none` in a [timeseries query|TimeSeriesQuery](timeseries query|TimeSeriesQuery.html) is currently not recommended (the system will try to generate 0 values for all milliseconds that didn’t exist, which is often a lot).
|
* `all` buckets everything into a single bucket
|
||||||
|
* `none` does not bucket data (it actually uses the granularity of the index - minimum here is `none` which means millisecond granularity). Using `none` in a [TimeSeriesQuery](TimeSeriesQuery.html) is currently not recommended (the system will try to generate 0 values for all milliseconds that didn’t exist, which is often a lot).
|
||||||
|
|
||||||
### Duration Granularities
|
### Duration Granularities
|
||||||
|
|
||||||
|
@ -19,11 +20,15 @@ Duration granularities are specified as an exact duration in milliseconds and ti
|
||||||
|
|
||||||
They also support specifying an optional origin, which defines where to start counting time buckets from (defaults to 1970-01-01T00:00:00Z).
|
They also support specifying an optional origin, which defines where to start counting time buckets from (defaults to 1970-01-01T00:00:00Z).
|
||||||
|
|
||||||
<code>{"type": "duration", "duration": "7200000"}</code>
|
```
|
||||||
|
{"type": "duration", "duration": "7200000"}
|
||||||
|
```
|
||||||
|
|
||||||
This chunks up every 2 hours.
|
This chunks up every 2 hours.
|
||||||
|
|
||||||
<code>{"type": "duration", "duration": "3600000", "origin": "2012-01-01T00:30:00Z"}</code>
|
```
|
||||||
|
{"type": "duration", "duration": "3600000", "origin": "2012-01-01T00:30:00Z"}
|
||||||
|
```
|
||||||
|
|
||||||
This chunks up every hour on the half-hour.
|
This chunks up every hour on the half-hour.
|
||||||
|
|
||||||
|
@ -38,11 +43,15 @@ By default years start on the first of January, months start on the first of the
|
||||||
Time zone is optional (defaults to UTC)
|
Time zone is optional (defaults to UTC)
|
||||||
Origin is optional (defaults to 1970-01-01T00:00:00 in the given time zone)
|
Origin is optional (defaults to 1970-01-01T00:00:00 in the given time zone)
|
||||||
|
|
||||||
<code>{"type": "period", "period": "P2D", "timeZone": "America/Los_Angeles"}</code>
|
```
|
||||||
|
{"type": "period", "period": "P2D", "timeZone": "America/Los_Angeles"}
|
||||||
|
```
|
||||||
|
|
||||||
This will bucket by two day chunks in the Pacific timezone.
|
This will bucket by two day chunks in the Pacific timezone.
|
||||||
|
|
||||||
<code>{"type": "period", "period": "P3M", "timeZone": "America/Los_Angeles", "origin": "2012-02-01T00:00:00-08:00"}</code>
|
```
|
||||||
|
{"type": "period", "period": "P3M", "timeZone": "America/Los_Angeles", "origin": "2012-02-01T00:00:00-08:00"}
|
||||||
|
```
|
||||||
|
|
||||||
This will bucket by 3 month chunks in the Pacific timezone where the three-month quarters are defined as starting from February.
|
This will bucket by 3 month chunks in the Pacific timezone where the three-month quarters are defined as starting from February.
|
||||||
|
|
||||||
|
|
|
@ -1,96 +1,53 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
These types of queries take a groupBy query object and return an array of JSON objects where each object represents a grouping asked for by the query.
|
These types of queries take a groupBy query object and return an array of JSON objects where each object represents a grouping asked for by the query.
|
||||||
|
|
||||||
An example groupBy query object is shown below:
|
An example groupBy query object is shown below:
|
||||||
|
|
||||||
<pre>
|
``` json
|
||||||
<code>
|
|
||||||
{
|
{
|
||||||
[queryType]() “groupBy”,
|
"queryType": "groupBy",
|
||||||
[dataSource]() “sample\_datasource”,
|
"dataSource": "sample_datasource",
|
||||||
[granularity]() “day”,
|
"granularity": "day",
|
||||||
[dimensions]() [“dim1”, “dim2”],
|
"dimensions": ["dim1", "dim2"],
|
||||||
[limitSpec]() {
|
"limitSpec": { "type": "default", "limit": 5000, "columns": ["dim1", "metric1"] },
|
||||||
[type]() “default”,
|
"filter": {
|
||||||
[limit]() 5000,
|
"type": "and",
|
||||||
[columns]() [“dim1”, “metric1”]
|
"fields": [
|
||||||
},
|
{ "type": "selector", "dimension": "sample_dimension1", "value": "sample_value1" },
|
||||||
[filter]() {
|
{ "type": "or",
|
||||||
[type]() “and”,
|
"fields": [
|
||||||
[fields]() [
|
{ "type": "selector", "dimension": "sample_dimension2", "value": "sample_value2" },
|
||||||
{
|
{ "type": "selector", "dimension": "sample_dimension3", "value": "sample_value3" }
|
||||||
[type]() “selector”,
|
]
|
||||||
[dimension]() “sample\_dimension1”,
|
}
|
||||||
[value]() “sample\_value1”
|
]
|
||||||
},
|
},
|
||||||
{
|
"aggregations": [
|
||||||
[type]() “or”,
|
{ "type": "longSum", "name": "sample_name1", "fieldName": "sample_fieldName1" },
|
||||||
[fields]() [
|
{ "type": "doubleSum", "name": "sample_name2", "fieldName": "sample_fieldName2" }
|
||||||
{
|
],
|
||||||
[type]() “selector”,
|
"postAggregations": [
|
||||||
[dimension]() “sample\_dimension2”,
|
{ "type": "arithmetic",
|
||||||
[value]() “sample\_value2”
|
"name": "sample_divide",
|
||||||
},
|
"fn": "/",
|
||||||
{
|
"fields": [
|
||||||
[type]() “selector”,
|
{ "type": "fieldAccess", "name": "sample_name1", "fieldName": "sample_fieldName1" },
|
||||||
[dimension]() “sample\_dimension3”,
|
{ "type": "fieldAccess", "name": "sample_name2", "fieldName": "sample_fieldName2" }
|
||||||
[value]() “sample\_value3”
|
]
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
"intervals": [ "2012-01-01T00:00:00.000/2012-01-03T00:00:00.000" ],
|
||||||
]
|
"having": { "type": "greaterThan", "aggregation": "sample_name1", "value": 0 }
|
||||||
},
|
|
||||||
[aggregations]() [
|
|
||||||
{
|
|
||||||
[type]() “longSum”,
|
|
||||||
[name]() “sample\_name1”,
|
|
||||||
[fieldName]() “sample\_fieldName1”
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]() “doubleSum”,
|
|
||||||
[name]() “sample\_name2”,
|
|
||||||
[fieldName]() “sample\_fieldName2”
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[postAggregations]() [
|
|
||||||
{
|
|
||||||
[type]() “arithmetic”,
|
|
||||||
[name]() “sample\_divide”,
|
|
||||||
[fn]() “/”,
|
|
||||||
[fields]() [
|
|
||||||
{
|
|
||||||
[type]() “fieldAccess”,
|
|
||||||
[name]() “sample\_name1”,
|
|
||||||
[fieldName]() “sample\_fieldName1”
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]() “fieldAccess”,
|
|
||||||
[name]() “sample\_name2”,
|
|
||||||
[fieldName]() “sample\_fieldName2”
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[intervals]() [
|
|
||||||
“2012-01-01T00:00:00.000/2012-01-03T00:00:00.000”
|
|
||||||
],
|
|
||||||
[having]() {
|
|
||||||
[type]() “greaterThan”,
|
|
||||||
[aggregation]() “sample\_name1”,
|
|
||||||
[value]() 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
```
|
||||||
</pre>
|
|
||||||
</code>
|
|
||||||
|
|
||||||
There are 9 main parts to a groupBy query:
|
There are 9 main parts to a groupBy query:
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|queryType|This String should always be “groupBy”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
|queryType|This String should always be "groupBy"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||||
|dimensions|A JSON list of dimensions to do the groupBy over|yes|
|
|dimensions|A JSON list of dimensions to do the groupBy over|yes|
|
||||||
|orderBy|See [OrderBy](OrderBy.html).|no|
|
|orderBy|See [OrderBy](OrderBy.html).|no|
|
||||||
|
@ -102,33 +59,32 @@ There are 9 main parts to a groupBy query:
|
||||||
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
||||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||||
|
|
||||||
To pull it all together, the above query would return *n\*m* data points, up to a maximum of 5000 points, where n is the cardinality of the “dim1” dimension, m is the cardinality of the “dim2” dimension, each day between 2012-01-01 and 2012-01-03, from the “sample\_datasource” table. Each data point contains the (long) sum of sample\_fieldName1 if the value of the data point is greater than 0, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set for a particular grouping of “dim1” and “dim2”. The output looks like this:
|
To pull it all together, the above query would return *n\*m* data points, up to a maximum of 5000 points, where n is the cardinality of the "dim1" dimension, m is the cardinality of the "dim2" dimension, each day between 2012-01-01 and 2012-01-03, from the "sample_datasource" table. Each data point contains the (long) sum of sample_fieldName1 if the value of the data point is greater than 0, the (double) sum of sample_fieldName2 and the (double) the result of sample_fieldName1 divided by sample_fieldName2 for the filter set for a particular grouping of "dim1" and "dim2". The output looks like this:
|
||||||
|
|
||||||
<pre>
|
```json
|
||||||
<code>
|
[
|
||||||
[ {
|
{
|
||||||
“version” : “v1”,
|
"version" : "v1",
|
||||||
“timestamp” : “2012-01-01T00:00:00.000Z”,
|
"timestamp" : "2012-01-01T00:00:00.000Z",
|
||||||
“event” : {
|
"event" : {
|
||||||
“dim1” : <some_dim1_value>,
|
"dim1" : <some_dim_value_one>,
|
||||||
“dim2” : <some_dim2_value>,
|
"dim2" : <some_dim_value_two>,
|
||||||
“sample\_name1” : <some_sample_name1_value>,
|
"sample_name1" : <some_sample_name_value_one>,
|
||||||
“sample\_name2” :<some_sample_name2_value>,
|
"sample_name2" :<some_sample_name_value_two>,
|
||||||
“sample\_divide” : <some_sample_divide_value>
|
"sample_divide" : <some_sample_divide_value>
|
||||||
}
|
}
|
||||||
}, {
|
},
|
||||||
“version” : “v1”,
|
{
|
||||||
“timestamp” : “2012-01-01T00:00:00.000Z”,
|
"version" : "v1",
|
||||||
“event” : {
|
"timestamp" : "2012-01-01T00:00:00.000Z",
|
||||||
“dim1” : <some_other_dim1_value>,
|
"event" : {
|
||||||
“dim2” : <some_other_dim2_value>,
|
"dim1" : <some_other_dim_value_one>,
|
||||||
“sample\_name1” : <some_other_sample_name1_value>,
|
"dim2" : <some_other_dim_value_two>,
|
||||||
“sample\_name2” :<some_other_sample_name2_value>,
|
"sample_name1" : <some_other_sample_name_value_one>,
|
||||||
“sample\_divide” : <some_other_sample_divide_value>
|
"sample_name2" :<some_other_sample_name_value_two>,
|
||||||
}
|
"sample_divide" : <some_other_sample_divide_value>
|
||||||
},
|
}
|
||||||
…
|
},
|
||||||
|
...
|
||||||
]
|
]
|
||||||
|
```
|
||||||
</pre>
|
|
||||||
</code>
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
A having clause is a JSON object identifying which rows from a groupBy query should be returned, by specifying conditions on aggregated values.
|
A having clause is a JSON object identifying which rows from a groupBy query should be returned, by specifying conditions on aggregated values.
|
||||||
|
|
||||||
|
@ -17,12 +17,13 @@ Numeric filters can be used as the base filters for more complex boolean express
|
||||||
The equalTo filter will match rows with a specific aggregate value.
|
The equalTo filter will match rows with a specific aggregate value.
|
||||||
The grammar for an `equalTo` filter is as follows:
|
The grammar for an `equalTo` filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "equalTo",
|
{
|
||||||
"aggregation": <aggregate_metric>,
|
"type": "equalTo",
|
||||||
"value": <numeric_value>
|
"aggregation": <aggregate_metric>,
|
||||||
}
|
"value": <numeric_value>
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
This is the equivalent of `HAVING <aggregate> = <value>`.
|
This is the equivalent of `HAVING <aggregate> = <value>`.
|
||||||
|
|
||||||
|
@ -31,12 +32,13 @@ This is the equivalent of `HAVING <aggregate> = <value>`.
|
||||||
The greaterThan filter will match rows with aggregate values greater than the given value.
|
The greaterThan filter will match rows with aggregate values greater than the given value.
|
||||||
The grammar for a `greaterThan` filter is as follows:
|
The grammar for a `greaterThan` filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "greaterThan",
|
{
|
||||||
"aggregation": <aggregate_metric>,
|
"type": "greaterThan",
|
||||||
"value": <numeric_value>
|
"aggregation": <aggregate_metric>,
|
||||||
}
|
"value": <numeric_value>
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
This is the equivalent of `HAVING <aggregate> > <value>`.
|
This is the equivalent of `HAVING <aggregate> > <value>`.
|
||||||
|
|
||||||
|
@ -45,12 +47,13 @@ This is the equivalent of `HAVING <aggregate> > <value>`.
|
||||||
The lessThan filter will match rows with aggregate values less than the specified value.
|
The lessThan filter will match rows with aggregate values less than the specified value.
|
||||||
The grammar for a `greaterThan` filter is as follows:
|
The grammar for a `greaterThan` filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "lessThan",
|
{
|
||||||
"aggregation": <aggregate_metric>,
|
"type": "lessThan",
|
||||||
"value": <numeric_value>
|
"aggregation": <aggregate_metric>,
|
||||||
}
|
"value": <numeric_value>
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
This is the equivalent of `HAVING <aggregate> < <value>`.
|
This is the equivalent of `HAVING <aggregate> < <value>`.
|
||||||
|
|
||||||
|
@ -60,11 +63,12 @@ This is the equivalent of `HAVING <aggregate> < <value>`.
|
||||||
|
|
||||||
The grammar for an AND filter is as follows:
|
The grammar for an AND filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "and",
|
{
|
||||||
"havingSpecs": [<having clause>, <having clause>, ...]
|
"type": "and",
|
||||||
}
|
"havingSpecs": [<having clause>, <having clause>, ...]
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
The having clauses in `havingSpecs` can be any other having clause defined on this page.
|
The having clauses in `havingSpecs` can be any other having clause defined on this page.
|
||||||
|
|
||||||
|
@ -72,11 +76,12 @@ The having clauses in `havingSpecs` can be any other having clause defined on th
|
||||||
|
|
||||||
The grammar for an OR filter is as follows:
|
The grammar for an OR filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "or",
|
{
|
||||||
"havingSpecs": [<having clause>, <having clause>, ...]
|
"type": "or",
|
||||||
}
|
"havingSpecs": [<having clause>, <having clause>, ...]
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
The having clauses in `havingSpecs` can be any other having clause defined on this page.
|
The having clauses in `havingSpecs` can be any other having clause defined on this page.
|
||||||
|
|
||||||
|
@ -84,10 +89,11 @@ The having clauses in `havingSpecs` can be any other having clause defined on th
|
||||||
|
|
||||||
The grammar for a NOT filter is as follows:
|
The grammar for a NOT filter is as follows:
|
||||||
|
|
||||||
<code>"having": {
|
```json
|
||||||
"type": "not",
|
{
|
||||||
"havingSpec": <having clause>
|
"type": "not",
|
||||||
}
|
"havingSpec": <having clause>
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
The having clause specified at `havingSpec` can be any other having clause defined on this page.
|
The having clause specified at `havingSpec` can be any other having clause defined on this page.
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
|
|
||||||
Druid is an open-source analytics datastore designed for realtime, exploratory, queries on large-scale data sets (100’s of Billions entries, 100’s TB data). Druid provides for cost effective, always-on, realtime data ingestion and arbitrary data exploration.
|
Druid is an open-source analytics datastore designed for realtime, exploratory, queries on large-scale data sets (100’s of Billions entries, 100’s TB data). Druid provides for cost effective, always-on, realtime data ingestion and arbitrary data exploration.
|
||||||
|
|
||||||
- Check out some [Examples](Examples.html)
|
- Check out some [Examples](Examples.html)
|
||||||
- Try out Druid with our Getting Started [Tutorial](https://github.com/metamx/druid/wiki/Tutorial%3A-A-First-Look-at-Druid)
|
- Try out Druid with our Getting Started [Tutorial](./Tutorial%3A-A-First-Look-at-Druid.html)
|
||||||
- Learn more by reading the [White Paper](http://static.druid.io/docs/druid.pdf)
|
- Learn more by reading the [White Paper](http://static.druid.io/docs/druid.pdf)
|
||||||
|
|
||||||
Why Druid?
|
Why Druid?
|
||||||
|
@ -24,25 +25,25 @@ We have more details about the general design of the system and why you might wa
|
||||||
The data store world is vast, confusing and constantly in flux. This page is meant to help potential evaluators decide whether Druid is a good fit for the problem one needs to solve. If anything about it is incorrect please provide that feedback on the mailing list or via some other means, we will fix this page.
|
The data store world is vast, confusing and constantly in flux. This page is meant to help potential evaluators decide whether Druid is a good fit for the problem one needs to solve. If anything about it is incorrect please provide that feedback on the mailing list or via some other means, we will fix this page.
|
||||||
|
|
||||||
#### When Druid?
|
#### When Druid?
|
||||||
\* You need to do interactive, fast, exploration of large amounts of data
|
* You need to do interactive, fast, exploration of large amounts of data
|
||||||
\* You need analytics (not key value store)
|
* You need analytics (not key value store)
|
||||||
\* You have a lot of data (10s of Billions of events added per day, 10s of TB of data added per day)
|
* You have a lot of data (10s of Billions of events added per day, 10s of TB of data added per day)
|
||||||
\* You want to do your analysis on data as it’s happening (realtime)
|
* You want to do your analysis on data as it’s happening (realtime)
|
||||||
\* Your store needs to be always-on, 24x7x365 and years into the future.
|
* Your store needs to be always-on, 24x7x365 and years into the future.
|
||||||
|
|
||||||
#### Not Druid?
|
#### Not Druid?
|
||||||
\* The amount of data you have can easily be handled by MySql
|
* The amount of data you have can easily be handled by MySql
|
||||||
\* Your querying for individual entries or doing lookups (Not Analytics)
|
* Your querying for individual entries or doing lookups (Not Analytics)
|
||||||
\* Batch is good enough
|
* Batch is good enough
|
||||||
\* Canned queries is good enough
|
* Canned queries is good enough
|
||||||
\* Downtime is no big deal
|
* Downtime is no big deal
|
||||||
|
|
||||||
#### Druid vs…
|
#### Druid vs…
|
||||||
\* [Druid-vs-Impala-or-Shark](Druid-vs-Impala-or-Shark.html)
|
* [Druid-vs-Impala-or-Shark](Druid-vs-Impala-or-Shark.html)
|
||||||
\* [Druid-vs-Redshift](Druid-vs-Redshift.html)
|
* [Druid-vs-Redshift](Druid-vs-Redshift.html)
|
||||||
\* [Druid-vs-Vertica](Druid-vs-Vertica.html)
|
* [Druid-vs-Vertica](Druid-vs-Vertica.html)
|
||||||
\* [Druid-vs-Cassandra](Druid-vs-Cassandra.html)
|
* [Druid-vs-Cassandra](Druid-vs-Cassandra.html)
|
||||||
\* [Druid-vs-Hadoop](Druid-vs-Hadoop.html)
|
* [Druid-vs-Hadoop](Druid-vs-Hadoop.html)
|
||||||
|
|
||||||
Key Features
|
Key Features
|
||||||
------------
|
------------
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Disclaimer: We are still in the process of finalizing the indexing service and these configs are prone to change at any time. We will announce when we feel the indexing service and the configurations described are stable.
|
Disclaimer: We are still in the process of finalizing the indexing service and these configs are prone to change at any time. We will announce when we feel the indexing service and the configurations described are stable.
|
||||||
|
|
||||||
|
@ -21,27 +21,37 @@ The indexer coordinator node exposes HTTP endpoints where tasks can be submitted
|
||||||
|
|
||||||
Tasks can be submitted via POST requests to:
|
Tasks can be submitted via POST requests to:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task
|
||||||
|
```
|
||||||
|
|
||||||
Tasks can cancelled via POST requests to:
|
Tasks can cancelled via POST requests to:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/shutdown
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/shutdown
|
||||||
|
```
|
||||||
|
|
||||||
Issuing the cancel request once sends a graceful shutdown request. Graceful shutdowns may not stop a task right away, but instead issue a safe stop command at a point deemed least impactful to the system. Issuing the cancel request twice in succession will kill –9 the task.
|
Issuing the cancel request once sends a graceful shutdown request. Graceful shutdowns may not stop a task right away, but instead issue a safe stop command at a point deemed least impactful to the system. Issuing the cancel request twice in succession will kill –9 the task.
|
||||||
|
|
||||||
Task statuses can be retrieved via GET requests to:
|
Task statuses can be retrieved via GET requests to:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/status
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/status
|
||||||
|
```
|
||||||
|
|
||||||
Task segments can be retrieved via GET requests to:
|
Task segments can be retrieved via GET requests to:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/segments
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/task/{taskId}/segments
|
||||||
|
```
|
||||||
|
|
||||||
When a task is submitted, the coordinator creates a lock over the data source and interval of the task. The coordinator also stores the task in a MySQL database table. The database table is read at startup time to bootstrap any tasks that may have been submitted to the coordinator but may not yet have been executed.
|
When a task is submitted, the coordinator creates a lock over the data source and interval of the task. The coordinator also stores the task in a MySQL database table. The database table is read at startup time to bootstrap any tasks that may have been submitted to the coordinator but may not yet have been executed.
|
||||||
|
|
||||||
The coordinator also exposes a simple UI to show what tasks are currently running on what nodes at
|
The coordinator also exposes a simple UI to show what tasks are currently running on what nodes at
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/static/console.html
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/static/console.html
|
||||||
|
```
|
||||||
|
|
||||||
#### Task Execution
|
#### Task Execution
|
||||||
|
|
||||||
|
@ -55,31 +65,34 @@ The Autoscaling mechanisms currently in place are tightly coupled with our deplo
|
||||||
|
|
||||||
The Coordinator node controls the number of workers in the cluster according to a worker setup spec that is submitted via a POST request to the indexer at:
|
The Coordinator node controls the number of workers in the cluster according to a worker setup spec that is submitted via a POST request to the indexer at:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
|
```
|
||||||
|
http://<COORDINATOR_IP>:<port>/druid/indexer/v1/worker/setup
|
||||||
|
```
|
||||||
|
|
||||||
A sample worker setup spec is shown below:
|
A sample worker setup spec is shown below:
|
||||||
|
|
||||||
<code>{
|
```
|
||||||
"minVersion":"some_version",
|
{
|
||||||
"minNumWorkers":"0",
|
"minVersion":"some_version",
|
||||||
"maxNumWorkers":"10",
|
"minNumWorkers":"0",
|
||||||
"nodeData": {
|
"maxNumWorkers":"10",
|
||||||
"type":"ec2",
|
"nodeData": {
|
||||||
"amiId":"ami-someId",
|
"type":"ec2",
|
||||||
"instanceType":"m1.xlarge",
|
"amiId":"ami-someId",
|
||||||
"minInstances":"1",
|
"instanceType":"m1.xlarge",
|
||||||
"maxInstances":"1",
|
"minInstances":"1",
|
||||||
"securityGroupIds":["securityGroupIds"],
|
"maxInstances":"1",
|
||||||
"keyName":"keyName"
|
"securityGroupIds":["securityGroupIds"],
|
||||||
},
|
"keyName":"keyName"
|
||||||
"userData":{
|
},
|
||||||
"classType":"galaxy",
|
"userData":{
|
||||||
"env":"druid",
|
"classType":"galaxy",
|
||||||
"version":"druid_version",
|
"env":"druid",
|
||||||
"type":"sample_cluster/worker"
|
"version":"druid_version",
|
||||||
}
|
"type":"sample_cluster/worker"
|
||||||
}
|
}
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Issuing a GET request at the same URL will return the current worker setup spec that is currently in place. The worker setup spec list above is just a sample and it is possible to write worker setup specs for other deployment environments. A description of the worker setup spec is shown below.
|
Issuing a GET request at the same URL will return the current worker setup spec that is currently in place. The worker setup spec list above is just a sample and it is possible to write worker setup specs for other deployment environments. A description of the worker setup spec is shown below.
|
||||||
|
|
||||||
|
@ -101,19 +114,21 @@ Indexer Coordinator nodes can be run using the `com.metamx.druid.indexing.coordi
|
||||||
|
|
||||||
Indexer Coordinator nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
|
Indexer Coordinator nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
|
||||||
|
|
||||||
-Ddruid.zk.paths.indexer.announcementsPath=/druid/indexer/announcements
|
```
|
||||||
-Ddruid.zk.paths.indexer.leaderLatchPath=/druid/indexer/leaderLatchPath
|
-Ddruid.zk.paths.indexer.announcementsPath=/druid/indexer/announcements
|
||||||
-Ddruid.zk.paths.indexer.statusPath=/druid/indexer/status
|
-Ddruid.zk.paths.indexer.leaderLatchPath=/druid/indexer/leaderLatchPath
|
||||||
-Ddruid.zk.paths.indexer.tasksPath=/druid/demo/indexer/tasks
|
-Ddruid.zk.paths.indexer.statusPath=/druid/indexer/status
|
||||||
|
-Ddruid.zk.paths.indexer.tasksPath=/druid/demo/indexer/tasks
|
||||||
|
|
||||||
-Ddruid.indexer.runner=remote
|
-Ddruid.indexer.runner=remote
|
||||||
-Ddruid.indexer.taskDir=/mnt/persistent/task/
|
-Ddruid.indexer.taskDir=/mnt/persistent/task/
|
||||||
-Ddruid.indexer.configTable=sample_config
|
-Ddruid.indexer.configTable=sample_config
|
||||||
-Ddruid.indexer.workerSetupConfigName=worker_setup
|
-Ddruid.indexer.workerSetupConfigName=worker_setup
|
||||||
-Ddruid.indexer.strategy=ec2
|
-Ddruid.indexer.strategy=ec2
|
||||||
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
||||||
-Ddruid.indexer.logs.s3bucket=some_bucket
|
-Ddruid.indexer.logs.s3bucket=some_bucket
|
||||||
-Ddruid.indexer.logs.s3prefix=some_prefix
|
-Ddruid.indexer.logs.s3prefix=some_prefix
|
||||||
|
```
|
||||||
|
|
||||||
The indexing service requires some additional Zookeeper configs.
|
The indexing service requires some additional Zookeeper configs.
|
||||||
|
|
||||||
|
@ -128,7 +143,7 @@ There’s several additional configs that are required to run tasks.
|
||||||
|
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|--------|-----------|-------|
|
|--------|-----------|-------|
|
||||||
|`druid.indexer.runner`|Indicates whether tasks should be run locally or in a distributed environment. “local” or “remote”.|local|
|
|`druid.indexer.runner`|Indicates whether tasks should be run locally or in a distributed environment. "local" or "remote".|local|
|
||||||
|`druid.indexer.taskDir`|Intermediate temporary directory that tasks may use.|none|
|
|`druid.indexer.taskDir`|Intermediate temporary directory that tasks may use.|none|
|
||||||
|`druid.indexer.configTable`|The MySQL config table where misc configs live.|none|
|
|`druid.indexer.configTable`|The MySQL config table where misc configs live.|none|
|
||||||
|`druid.indexer.strategy`|The autoscaling strategy to use.|noop|
|
|`druid.indexer.strategy`|The autoscaling strategy to use.|noop|
|
||||||
|
@ -140,7 +155,9 @@ There’s several additional configs that are required to run tasks.
|
||||||
|
|
||||||
The indexer console can be used to view pending tasks, running tasks, available workers, and recent worker creation and termination. The console can be accessed at:
|
The indexer console can be used to view pending tasks, running tasks, available workers, and recent worker creation and termination. The console can be accessed at:
|
||||||
|
|
||||||
http://<COORDINATOR_IP>:8080/static/console.html
|
```
|
||||||
|
http://<COORDINATOR_IP>:8080/static/console.html
|
||||||
|
```
|
||||||
|
|
||||||
Worker Node
|
Worker Node
|
||||||
-----------
|
-----------
|
||||||
|
@ -155,29 +172,31 @@ Worker nodes can be run using the `com.metamx.druid.indexing.worker.http.WorkerM
|
||||||
|
|
||||||
Worker nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
|
Worker nodes require [basic service configuration](https://github.com/metamx/druid/wiki/Configuration#basic-service-configuration). In addition, there are several extra configurations that are required.
|
||||||
|
|
||||||
-Ddruid.worker.version=0
|
```
|
||||||
-Ddruid.worker.capacity=3
|
-Ddruid.worker.version=0
|
||||||
|
-Ddruid.worker.capacity=3
|
||||||
|
|
||||||
-Ddruid.indexer.threads=3
|
-Ddruid.indexer.threads=3
|
||||||
-Ddruid.indexer.taskDir=/mnt/persistent/task/
|
-Ddruid.indexer.taskDir=/mnt/persistent/task/
|
||||||
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
-Ddruid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
||||||
|
|
||||||
-Ddruid.worker.masterService=druid:sample_cluster:indexer
|
-Ddruid.worker.masterService=druid:sample_cluster:indexer
|
||||||
|
|
||||||
-Ddruid.indexer.fork.hostpattern=<IP>:%d
|
-Ddruid.indexer.fork.hostpattern=<IP>:%d
|
||||||
-Ddruid.indexer.fork.startport=8080
|
-Ddruid.indexer.fork.startport=8080
|
||||||
-Ddruid.indexer.fork.main=com.metamx.druid.indexing.worker.executor.ExecutorMain
|
-Ddruid.indexer.fork.main=com.metamx.druid.indexing.worker.executor.ExecutorMain
|
||||||
-Ddruid.indexer.fork.opts="-server -Xmx1g -Xms1g -XX:NewSize=256m -XX:MaxNewSize=256m"
|
-Ddruid.indexer.fork.opts="-server -Xmx1g -Xms1g -XX:NewSize=256m -XX:MaxNewSize=256m"
|
||||||
-Ddruid.indexer.fork.property.druid.service=druid/sample_cluster/executor
|
-Ddruid.indexer.fork.property.druid.service=druid/sample_cluster/executor
|
||||||
|
|
||||||
# These configs are the same configs you would set for basic service configuration, just with a different prefix
|
# These configs are the same configs you would set for basic service configuration, just with a different prefix
|
||||||
-Ddruid.indexer.fork.property.druid.monitoring.monitorSystem=false
|
-Ddruid.indexer.fork.property.druid.monitoring.monitorSystem=false
|
||||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
|
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
|
||||||
-Ddruid.indexer.fork.property.druid.indexer.taskDir=/mnt/persistent/task/
|
-Ddruid.indexer.fork.property.druid.indexer.taskDir=/mnt/persistent/task/
|
||||||
-Ddruid.indexer.fork.property.druid.processing.formatString=processing-%s
|
-Ddruid.indexer.fork.property.druid.processing.formatString=processing-%s
|
||||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||||
-Ddruid.indexer.fork.property.druid.server.maxSize=0
|
-Ddruid.indexer.fork.property.druid.server.maxSize=0
|
||||||
-Ddruid.indexer.fork.property.druid.request.logging.dir=request_logs/
|
-Ddruid.indexer.fork.property.druid.request.logging.dir=request_logs/
|
||||||
|
```
|
||||||
|
|
||||||
Many of the configurations for workers are similar to those for basic service configuration":https://github.com/metamx/druid/wiki/Configuration\#basic-service-configuration, but with a different config prefix. Below we describe the unique worker configs.
|
Many of the configurations for workers are similar to those for basic service configuration":https://github.com/metamx/druid/wiki/Configuration\#basic-service-configuration, but with a different config prefix. Below we describe the unique worker configs.
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
### R
|
|
||||||
|
|
||||||
- [RDruid](https://github.com/metamx/RDruid) - Druid connector for R
|
|
||||||
|
|
||||||
Community Libraries
|
Community Libraries
|
||||||
-------------------
|
-------------------
|
||||||
|
@ -11,13 +8,18 @@ Community Libraries
|
||||||
Some great folks have written their own libraries to interact with Druid
|
Some great folks have written their own libraries to interact with Druid
|
||||||
|
|
||||||
#### Ruby
|
#### Ruby
|
||||||
\* [madvertise/ruby-druid](https://github.com/madvertise/ruby-druid) - A ruby client for Druid
|
|
||||||
|
* [madvertise/ruby-druid](https://github.com/madvertise/ruby-druid) - A ruby client for Druid
|
||||||
|
|
||||||
#### Python
|
#### Python
|
||||||
\* [metamx/pydruid](https://github.com/metamx/pydruid) - A python client for Druid
|
|
||||||
|
* [metamx/pydruid](https://github.com/metamx/pydruid) - A python client for Druid
|
||||||
|
|
||||||
|
#### R
|
||||||
|
|
||||||
|
- [RDruid](https://github.com/metamx/RDruid) - Druid connector for R
|
||||||
|
|
||||||
#### Helper Libraries
|
#### Helper Libraries
|
||||||
|
|
||||||
- [madvertise/druid-dumbo](https://github.com/madvertise/druid-dumbo) - Scripts to help generate batch configs for the ingestion of data into Druid
|
* [madvertise/druid-dumbo](https://github.com/madvertise/druid-dumbo) - Scripts to help generate batch configs for the ingestion of data into Druid
|
||||||
|
* [housejester/druid-test-harness](https://github.com/housejester/druid-test-harness) - A set of scripts to simplify standing up some servers and seeing how things work
|
||||||
- [housejester/druid-test-harness](https://github.com/housejester/druid-test-harness) - A set of scripts to simplify standing up some servers and seeing how things work
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Once you have a realtime node working, it is time to load your own data to see how Druid performs.
|
Once you have a realtime node working, it is time to load your own data to see how Druid performs.
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ mkdir config/broker
|
||||||
|
|
||||||
## Loading Data with Kafka ##
|
## Loading Data with Kafka ##
|
||||||
|
|
||||||
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in realtime without writing any code. To load data to a realtime node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
|
[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in realtime without writing any code. To load data to a realtime node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node.
|
||||||
|
|
||||||
### Booting Kafka ###
|
### Booting Kafka ###
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ Instructions for booting a Zookeeper and then Kafka cluster are available [here]
|
||||||
|
|
||||||
1. Create a valid configuration file similar to this called config/realtime/runtime.properties:
|
1. Create a valid configuration file similar to this called config/realtime/runtime.properties:
|
||||||
|
|
||||||
```
|
```properties
|
||||||
druid.host=0.0.0.0:8080
|
druid.host=0.0.0.0:8080
|
||||||
druid.port=8080
|
druid.port=8080
|
||||||
|
|
||||||
|
@ -91,7 +91,6 @@ Instructions for booting a Zookeeper and then Kafka cluster are available [here]
|
||||||
druid.database.password=diurd
|
druid.database.password=diurd
|
||||||
druid.database.connectURI=
|
druid.database.connectURI=
|
||||||
druid.host=127.0.0.1:8080
|
druid.host=127.0.0.1:8080
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Create a valid realtime configuration file similar to this called realtime.spec:
|
2. Create a valid realtime configuration file similar to this called realtime.spec:
|
||||||
|
@ -239,7 +238,7 @@ If you've already setup a realtime node, be aware that although you can run mult
|
||||||
|
|
||||||
1. Setup a configuration file called config/master/runtime.properties similar to:
|
1. Setup a configuration file called config/master/runtime.properties similar to:
|
||||||
|
|
||||||
```bash
|
```properties
|
||||||
druid.host=0.0.0.0:8081
|
druid.host=0.0.0.0:8081
|
||||||
druid.port=8081
|
druid.port=8081
|
||||||
|
|
||||||
|
@ -294,7 +293,7 @@ If you've already setup a realtime node, be aware that although you can run mult
|
||||||
|
|
||||||
1. Create a configuration file in config/compute/runtime.properties similar to:
|
1. Create a configuration file in config/compute/runtime.properties similar to:
|
||||||
|
|
||||||
```bash
|
```properties
|
||||||
druid.host=0.0.0.0:8082
|
druid.host=0.0.0.0:8082
|
||||||
druid.port=8082
|
druid.port=8082
|
||||||
|
|
||||||
|
@ -404,7 +403,9 @@ Now its time to run the Hadoop [Batch-ingestion](Batch-ingestion.html) job, Hado
|
||||||
2. Now run the job, with the config pointing at batchConfig.json:
|
2. Now run the job, with the config pointing at batchConfig.json:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Ddruid.realtime.specFile=realtime.spec -classpath lib/* com.metamx.druid.indexer.HadoopDruidIndexerMain batchConfig.json
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
||||||
|
-Ddruid.realtime.specFile=realtime.spec -classpath lib/* \
|
||||||
|
com.metamx.druid.indexer.HadoopDruidIndexerMain batchConfig.json
|
||||||
```
|
```
|
||||||
|
|
||||||
You can now move on to [Querying Your Data](Querying-Your-Data.html)!
|
You can now move on to [Querying Your Data](Querying-Your-Data.html)!
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Master
|
Master
|
||||||
======
|
======
|
||||||
|
@ -39,37 +39,67 @@ The master node exposes several HTTP endpoints for interactions.
|
||||||
|
|
||||||
### GET
|
### GET
|
||||||
|
|
||||||
/info/master - returns the current true master of the cluster as a JSON object. E.g. A GET request to <IP>:8080/info/master will yield JSON of the form {[host]("IP"})
|
* `/info/master`
|
||||||
|
|
||||||
/info/cluster - returns JSON data about every node and segment in the cluster. E.g. A GET request to <IP>:8080/info/cluster will yield JSON data organized by nodes. Information about each node and each segment on each node will be returned.
|
Returns the current true master of the cluster as a JSON object.
|
||||||
|
|
||||||
/info/servers (optional param ?full) - returns all segments in the cluster if the full flag is not set, otherwise returns full metadata about all servers in the cluster
|
* `/info/cluster`
|
||||||
|
|
||||||
/info/servers/{serverName} - returns full metadata about a specific server
|
Returns JSON data about every node and segment in the cluster. Information about each node and each segment on each node will be returned.
|
||||||
|
|
||||||
/info/servers/{serverName}/segments (optional param ?full) - returns a list of all segments for a server if the full flag is not set, otherwise returns all segment metadata
|
* `/info/servers`
|
||||||
|
|
||||||
/info/servers/{serverName}/segments/{segmentId} - returns full metadata for a specific segment
|
Returns information about servers in the cluster. Set the `?full` query parameter to get full metadata about all servers and their segments in the cluster.
|
||||||
|
|
||||||
/info/segments (optional param ?full)- returns all segments in the cluster as a list if the full flag is not set, otherwise returns all metadata about segments in the cluster
|
* `/info/servers/{serverName}`
|
||||||
|
|
||||||
/info/segments/{segmentId} - returns full metadata for a specific segment
|
Returns full metadata about a specific server.
|
||||||
|
|
||||||
/info/datasources (optional param ?full) - returns a list of datasources in the cluster if the full flag is not set, otherwise returns all the metadata for every datasource in the cluster
|
* `/info/servers/{serverName}/segments`
|
||||||
|
|
||||||
/info/datasources/{dataSourceName} - returns full metadata for a datasource
|
Returns a list of all segments for a server. Set the `?full` query parameter to get all segment metadata included
|
||||||
|
|
||||||
/info/datasources/{dataSourceName}/segments (optional param ?full) - returns a list of all segments for a datasource if the full flag is not set, otherwise returns full segment metadata for a datasource
|
* `/info/servers/{serverName}/segments/{segmentId}`
|
||||||
|
|
||||||
/info/datasources/{dataSourceName}/segments/{segmentId} - returns full segment metadata for a specific segment
|
Returns full metadata for a specific segment.
|
||||||
|
|
||||||
/info/rules - returns all rules for all data sources in the cluster including the default datasource.
|
* `/info/segments`
|
||||||
|
|
||||||
/info/rules/{dataSourceName} - returns all rules for a specified datasource
|
Returns all segments in the cluster as a list. Set the `?full` flag to get all metadata about segments in the cluster
|
||||||
|
|
||||||
|
* `/info/segments/{segmentId}`
|
||||||
|
|
||||||
|
Returns full metadata for a specific segment
|
||||||
|
|
||||||
|
* `/info/datasources`
|
||||||
|
|
||||||
|
Returns a list of datasources in the cluster. Set the `?full` flag to get all metadata for every datasource in the cluster
|
||||||
|
|
||||||
|
* `/info/datasources/{dataSourceName}`
|
||||||
|
|
||||||
|
Returns full metadata for a datasource
|
||||||
|
|
||||||
|
* `/info/datasources/{dataSourceName}/segments`
|
||||||
|
|
||||||
|
Returns a list of all segments for a datasource. Set the `?full` flag to get full segment metadata for a datasource
|
||||||
|
|
||||||
|
* `/info/datasources/{dataSourceName}/segments/{segmentId}`
|
||||||
|
|
||||||
|
Returns full segment metadata for a specific segment
|
||||||
|
|
||||||
|
* `/info/rules`
|
||||||
|
|
||||||
|
Returns all rules for all data sources in the cluster including the default datasource.
|
||||||
|
|
||||||
|
* `/info/rules/{dataSourceName}`
|
||||||
|
|
||||||
|
Returns all rules for a specified datasource
|
||||||
|
|
||||||
### POST
|
### POST
|
||||||
|
|
||||||
/info/rules/{dataSourceName} - POST with a list of rules in JSON form to update rules.
|
* `/info/rules/{dataSourceName}`
|
||||||
|
|
||||||
|
POST with a list of rules in JSON form to update rules.
|
||||||
|
|
||||||
The Master Console
|
The Master Console
|
||||||
------------------
|
------------------
|
||||||
|
@ -83,17 +113,17 @@ FAQ
|
||||||
|
|
||||||
1. **Do clients ever contact the master node?**
|
1. **Do clients ever contact the master node?**
|
||||||
|
|
||||||
The master is not involved in the lifecycle of a query.
|
The master is not involved in a query.
|
||||||
|
|
||||||
Compute nodes never directly contact the master node. The Druid master tells the compute nodes to load/drop data via Zookeeper, but the compute nodes are completely unaware of the master.
|
Compute nodes never directly contact the master node. The Druid master tells the compute nodes to load/drop data via Zookeeper, but the compute nodes are completely unaware of the master.
|
||||||
|
|
||||||
Brokers also never contact the master. Brokers base their understanding of the data topology on metadata exposed by the compute nodes via ZK and are completely unaware of the master.
|
Brokers also never contact the master. Brokers base their understanding of the data topology on metadata exposed by the compute nodes via ZK and are completely unaware of the master.
|
||||||
|
|
||||||
2. **Does it matter if the master node starts up before or after other processes?**
|
2. **Does it matter if the master node starts up before or after other processes?**
|
||||||
|
|
||||||
No. If the Druid master is not started up, no new segments will be loaded in the cluster and outdated segments will not be dropped. However, the master node can be started up at any time, and after a configurable delay, will start running master tasks.
|
No. If the Druid master is not started up, no new segments will be loaded in the cluster and outdated segments will not be dropped. However, the master node can be started up at any time, and after a configurable delay, will start running master tasks.
|
||||||
|
|
||||||
This also means that if you have a working cluster and all of your masters die, the cluster will continue to function, it just won’t experience any changes to its data topology.
|
This also means that if you have a working cluster and all of your masters die, the cluster will continue to function, it just won’t experience any changes to its data topology.
|
||||||
|
|
||||||
Running
|
Running
|
||||||
-------
|
-------
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
MySQL is an external dependency of Druid. We use it to store various metadata about the system, but not to store the actual data. There are a number of tables used for various purposes described below.
|
MySQL is an external dependency of Druid. We use it to store various metadata about the system, but not to store the actual data. There are a number of tables used for various purposes described below.
|
||||||
|
|
||||||
|
@ -10,24 +10,26 @@ This is dictated by the `druid.database.segmentTable` property (Note that these
|
||||||
|
|
||||||
This table stores metadata about the segments that are available in the system. The table is polled by the [Master](Master.html) to determine the set of segments that should be available for querying in the system. The table has two main functional columns, the other columns are for indexing purposes.
|
This table stores metadata about the segments that are available in the system. The table is polled by the [Master](Master.html) to determine the set of segments that should be available for querying in the system. The table has two main functional columns, the other columns are for indexing purposes.
|
||||||
|
|
||||||
The `used` column is a boolean “tombstone”. A 1 means that the segment should be “used” by the cluster (i.e. it should be loaded and available for requests). A 0 means that the segment should not be actively loaded into the cluster. We do this as a means of removing segments from the cluster without actually removing their metadata (which allows for simpler rolling back if that is ever an issue).
|
The `used` column is a boolean "tombstone". A 1 means that the segment should be "used" by the cluster (i.e. it should be loaded and available for requests). A 0 means that the segment should not be actively loaded into the cluster. We do this as a means of removing segments from the cluster without actually removing their metadata (which allows for simpler rolling back if that is ever an issue).
|
||||||
|
|
||||||
The `payload` column stores a JSON blob that has all of the metadata for the segment (some of the data stored in this payload is redundant with some of the columns in the table, that is intentional). This looks something like
|
The `payload` column stores a JSON blob that has all of the metadata for the segment (some of the data stored in this payload is redundant with some of the columns in the table, that is intentional). This looks something like
|
||||||
|
|
||||||
{
|
```
|
||||||
"dataSource":"wikipedia",
|
{
|
||||||
"interval":"2012-05-23T00:00:00.000Z/2012-05-24T00:00:00.000Z",
|
"dataSource":"wikipedia",
|
||||||
"version":"2012-05-24T00:10:00.046Z",
|
"interval":"2012-05-23T00:00:00.000Z/2012-05-24T00:00:00.000Z",
|
||||||
"loadSpec":{"type":"s3_zip",
|
"version":"2012-05-24T00:10:00.046Z",
|
||||||
"bucket":"bucket_for_segment",
|
"loadSpec":{"type":"s3_zip",
|
||||||
"key":"path/to/segment/on/s3"},
|
"bucket":"bucket_for_segment",
|
||||||
"dimensions":"comma-delimited-list-of-dimension-names",
|
"key":"path/to/segment/on/s3"},
|
||||||
"metrics":"comma-delimited-list-of-metric-names",
|
"dimensions":"comma-delimited-list-of-dimension-names",
|
||||||
"shardSpec":{"type":"none"},
|
"metrics":"comma-delimited-list-of-metric-names",
|
||||||
"binaryVersion":9,
|
"shardSpec":{"type":"none"},
|
||||||
"size":size_of_segment,
|
"binaryVersion":9,
|
||||||
"identifier":"wikipedia_2012-05-23T00:00:00.000Z_2012-05-24T00:00:00.000Z_2012-05-23T00:10:00.046Z"
|
"size":size_of_segment,
|
||||||
}
|
"identifier":"wikipedia_2012-05-23T00:00:00.000Z_2012-05-24T00:00:00.000Z_2012-05-23T00:10:00.046Z"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Note that the format of this blob can and will change from time-to-time.
|
Note that the format of this blob can and will change from time-to-time.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
The orderBy field provides the functionality to sort and limit the set of results from a groupBy query. Available options are:
|
The orderBy field provides the functionality to sort and limit the set of results from a groupBy query. Available options are:
|
||||||
|
|
||||||
|
@ -7,21 +7,21 @@ The orderBy field provides the functionality to sort and limit the set of result
|
||||||
|
|
||||||
The default limit spec takes a limit and the list of columns to do an orderBy operation over. The grammar is:
|
The default limit spec takes a limit and the list of columns to do an orderBy operation over. The grammar is:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"type" : "default",
|
"type" : "default",
|
||||||
"limit" : <integer_value>,
|
"limit" : <integer_value>,
|
||||||
"columns" : [list of OrderByColumnSpec],
|
"columns" : [list of OrderByColumnSpec],
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
#### OrderByColumnSpec
|
#### OrderByColumnSpec
|
||||||
|
|
||||||
OrderByColumnSpecs indicate how to do order by operations. Each order by condition can be a <code>String</code> or a map of the following form:
|
OrderByColumnSpecs indicate how to do order by operations. Each order by condition can be a `jsonString` or a map of the following form:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"dimension" : "<Any dimension or metric>",
|
"dimension" : <Any dimension or metric>,
|
||||||
"direction" : "ASCENDING OR DESCENDING"
|
"direction" : "ASCENDING OR DESCENDING"
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
The Plumber is the thing that handles generated segments both while they are being generated and when they are “done”. This is also technically a pluggable interface and there are multiple implementations, but there are a lot of details handled by the plumber such that it is expected that there will only be a few implementations and only more advanced third-parties will implement their own. See [here](https://github.com/metamx/druid/wiki/Plumber#available-plumbers) for a description of the plumbers included with Druid.
|
The Plumber is the thing that handles generated segments both while they are being generated and when they are "done". This is also technically a pluggable interface and there are multiple implementations, but there are a lot of details handled by the plumber such that it is expected that there will only be a few implementations and only more advanced third-parties will implement their own.
|
||||||
|
|
||||||
|Field|Type|Description|Required|
|
|Field|Type|Description|Required|
|
||||||
|-----|----|-----------|--------|
|
|-----|----|-----------|--------|
|
||||||
|
@ -9,8 +9,8 @@ The Plumber is the thing that handles generated segments both while they are bei
|
||||||
|
|
||||||
We provide a brief description of the example to exemplify the types of things that are configured on the plumber.
|
We provide a brief description of the example to exemplify the types of things that are configured on the plumber.
|
||||||
|
|
||||||
- `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
|
* `windowPeriod` is the amount of lag time to allow events. This is configured with a 10 minute window, meaning that any event more than 10 minutes ago will be thrown away and not included in the segment generated by the realtime server.
|
||||||
- `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
|
* `basePersistDirectory` is the directory to put things that need persistence. The plumber is responsible for the actual intermediate persists and this tells it where to store those persists.
|
||||||
|
|
||||||
Available Plumbers
|
Available Plumbers
|
||||||
------------------
|
------------------
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Post-aggregations are specifications of processing that should happen on aggregated values as they come out of Druid. If you include a post aggregation as part of a query, make sure to include all aggregators the post-aggregator requires.
|
Post-aggregations are specifications of processing that should happen on aggregated values as they come out of Druid. If you include a post aggregation as part of a query, make sure to include all aggregators the post-aggregator requires.
|
||||||
|
|
||||||
|
@ -13,83 +13,63 @@ Supported functions are `+`, `-`, `*`, and `/`
|
||||||
|
|
||||||
The grammar for an arithmetic post aggregation is:
|
The grammar for an arithmetic post aggregation is:
|
||||||
|
|
||||||
<code>postAggregation : {
|
```json
|
||||||
"type" : "arithmetic",
|
postAggregation : {
|
||||||
"name" : <output_name>,
|
"type" : "arithmetic",
|
||||||
"fn" : <arithmetic_function>,
|
"name" : <output_name>,
|
||||||
"fields": [<post_aggregator>, <post_aggregator>, ...]
|
"fn" : <arithmetic_function>,
|
||||||
}</code>
|
"fields": [<post_aggregator>, <post_aggregator>, ...]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Field accessor post-aggregator
|
### Field accessor post-aggregator
|
||||||
|
|
||||||
This returns the value produced by the specified [aggregator|Aggregations](aggregator|Aggregations.html).
|
This returns the value produced by the specified [aggregator](Aggregations.html).
|
||||||
|
|
||||||
`fieldName` refers to the output name of the aggregator given in the [aggregations|Aggregations](aggregations|Aggregations.html) portion of the query.
|
`fieldName` refers to the output name of the aggregator given in the [aggregations](Aggregations.html) portion of the query.
|
||||||
|
|
||||||
<code>field_accessor : {
|
```json
|
||||||
"type" : "fieldAccess",
|
{ "type" : "fieldAccess", "fieldName" : <aggregator_name> }
|
||||||
"fieldName" : <aggregator_name>
|
```
|
||||||
}</code>
|
|
||||||
|
|
||||||
### Constant post-aggregator
|
### Constant post-aggregator
|
||||||
|
|
||||||
The constant post-aggregator always returns the specified value.
|
The constant post-aggregator always returns the specified value.
|
||||||
|
|
||||||
<code>constant : {
|
```json
|
||||||
"type" : "constant",
|
{ "type" : "constant", "name" : <output_name>, "value" : <numerical_value> }
|
||||||
"name" : <output_name>,
|
```
|
||||||
"value" : <numerical_value>,
|
|
||||||
}</code>
|
|
||||||
|
|
||||||
### Example Usage
|
### Example Usage
|
||||||
|
|
||||||
In this example, let’s calculate a simple percentage using post aggregators. Let’s imagine our data set has a metric called “total”.
|
In this example, let’s calculate a simple percentage using post aggregators. Let’s imagine our data set has a metric called "total".
|
||||||
|
|
||||||
The format of the query JSON is as follows:
|
The format of the query JSON is as follows:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
...
|
...
|
||||||
"aggregations" : [
|
"aggregations" : [
|
||||||
{
|
{ "type" : "count", "name" : "rows" },
|
||||||
"type" : "count",
|
{ "type" : "doubleSum", "name" : "tot", "fieldName" : "total" }
|
||||||
"name" : "rows"
|
],
|
||||||
},
|
"postAggregations" : {
|
||||||
{
|
"type" : "arithmetic",
|
||||||
"type" : "doubleSum",
|
"name" : "average",
|
||||||
"name" : "tot",
|
"fn" : "*",
|
||||||
"fieldName" : "total"
|
"fields" : [
|
||||||
}
|
{ "type" : "arithmetic",
|
||||||
],
|
"name" : "div",
|
||||||
"postAggregations" : {
|
"fn" : "/",
|
||||||
"type" : "arithmetic",
|
"fields" : [
|
||||||
"name" : "average",
|
{ "type" : "fieldAccess", "name" : "tot", "fieldName" : "tot" },
|
||||||
"fn" : "*",
|
{ "type" : "fieldAccess", "name" : "rows", "fieldName" : "rows" }
|
||||||
"fields" : [
|
]
|
||||||
{
|
},
|
||||||
"type" : "arithmetic",
|
{ "type" : "constant", "name": "const", "value" : 100 }
|
||||||
"name" : "div",
|
]
|
||||||
"fn" : "/",
|
}
|
||||||
"fields" : [
|
...
|
||||||
{
|
}
|
||||||
"type" : "fieldAccess",
|
|
||||||
"name" : "tot",
|
```
|
||||||
"fieldName" : "tot"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "fieldAccess",
|
|
||||||
"name" : "rows",
|
|
||||||
"fieldName" : "rows"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "constant",
|
|
||||||
"name": "const",
|
|
||||||
"value" : 100
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
...
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
# Setup #
|
# Setup #
|
||||||
|
|
||||||
|
@ -8,93 +8,100 @@ Before we start querying druid, we're going to finish setting up a complete clus
|
||||||
## Booting a Broker Node ##
|
## Booting a Broker Node ##
|
||||||
|
|
||||||
1. Setup a config file at config/broker/runtime.properties that looks like this:
|
1. Setup a config file at config/broker/runtime.properties that looks like this:
|
||||||
```
|
|
||||||
druid.host=0.0.0.0:8083
|
```
|
||||||
druid.port=8083
|
druid.host=0.0.0.0:8083
|
||||||
|
druid.port=8083
|
||||||
|
|
||||||
com.metamx.emitter.logging=true
|
com.metamx.emitter.logging=true
|
||||||
|
|
||||||
druid.processing.formatString=processing_%s
|
druid.processing.formatString=processing_%s
|
||||||
druid.processing.numThreads=1
|
druid.processing.numThreads=1
|
||||||
druid.processing.buffer.sizeBytes=10000000
|
druid.processing.buffer.sizeBytes=10000000
|
||||||
|
|
||||||
#emitting, opaque marker
|
#emitting, opaque marker
|
||||||
druid.service=example
|
druid.service=example
|
||||||
|
|
||||||
druid.request.logging.dir=/tmp/example/log
|
druid.request.logging.dir=/tmp/example/log
|
||||||
druid.realtime.specFile=realtime.spec
|
druid.realtime.specFile=realtime.spec
|
||||||
com.metamx.emitter.logging=true
|
com.metamx.emitter.logging=true
|
||||||
com.metamx.emitter.logging.level=debug
|
com.metamx.emitter.logging.level=debug
|
||||||
|
|
||||||
# below are dummy values when operating a realtime only node
|
# below are dummy values when operating a realtime only node
|
||||||
druid.processing.numThreads=3
|
druid.processing.numThreads=3
|
||||||
|
|
||||||
com.metamx.aws.accessKey=dummy_access_key
|
com.metamx.aws.accessKey=dummy_access_key
|
||||||
com.metamx.aws.secretKey=dummy_secret_key
|
com.metamx.aws.secretKey=dummy_secret_key
|
||||||
druid.pusher.s3.bucket=dummy_s3_bucket
|
druid.pusher.s3.bucket=dummy_s3_bucket
|
||||||
|
|
||||||
druid.zk.service.host=localhost
|
druid.zk.service.host=localhost
|
||||||
druid.server.maxSize=300000000000
|
druid.server.maxSize=300000000000
|
||||||
druid.zk.paths.base=/druid
|
druid.zk.paths.base=/druid
|
||||||
druid.database.segmentTable=prod_segments
|
druid.database.segmentTable=prod_segments
|
||||||
druid.database.user=druid
|
druid.database.user=druid
|
||||||
druid.database.password=diurd
|
druid.database.password=diurd
|
||||||
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
|
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
|
||||||
druid.zk.paths.discoveryPath=/druid/discoveryPath
|
druid.zk.paths.discoveryPath=/druid/discoveryPath
|
||||||
druid.database.ruleTable=rules
|
druid.database.ruleTable=rules
|
||||||
druid.database.configTable=config
|
druid.database.configTable=config
|
||||||
|
|
||||||
# Path on local FS for storage of segments; dir will be created if needed
|
# Path on local FS for storage of segments; dir will be created if needed
|
||||||
druid.paths.indexCache=/tmp/druid/indexCache
|
druid.paths.indexCache=/tmp/druid/indexCache
|
||||||
# Path on local FS for storage of segment metadata; dir will be created if needed
|
# Path on local FS for storage of segment metadata; dir will be created if needed
|
||||||
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
|
druid.paths.segmentInfoCache=/tmp/druid/segmentInfoCache
|
||||||
druid.pusher.local.storageDirectory=/tmp/druid/localStorage
|
druid.pusher.local.storageDirectory=/tmp/druid/localStorage
|
||||||
druid.pusher.local=true
|
druid.pusher.local=true
|
||||||
|
|
||||||
# thread pool size for servicing queries
|
# thread pool size for servicing queries
|
||||||
druid.client.http.connections=30
|
druid.client.http.connections=30
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Run the broker node:
|
2. Run the broker node:
|
||||||
```bash
|
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
```bash
|
||||||
-Ddruid.realtime.specFile=realtime.spec \
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
||||||
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/broker \
|
-Ddruid.realtime.specFile=realtime.spec \
|
||||||
com.metamx.druid.http.BrokerMain
|
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/broker \
|
||||||
```
|
com.metamx.druid.http.BrokerMain
|
||||||
|
```
|
||||||
|
|
||||||
## Booting a Master Node ##
|
## Booting a Master Node ##
|
||||||
|
|
||||||
1. Setup a config file at config/master/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818870](https://gist.github.com/rjurney/5818870)
|
1. Setup a config file at config/master/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818870](https://gist.github.com/rjurney/5818870)
|
||||||
|
|
||||||
2. Run the master node:
|
2. Run the master node:
|
||||||
```bash
|
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
```bash
|
||||||
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/master \
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
||||||
com.metamx.druid.http.MasterMain
|
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/master \
|
||||||
```
|
com.metamx.druid.http.MasterMain
|
||||||
|
```
|
||||||
|
|
||||||
## Booting a Realtime Node ##
|
## Booting a Realtime Node ##
|
||||||
|
|
||||||
1. Setup a config file at config/realtime/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818774](https://gist.github.com/rjurney/5818774)
|
1. Setup a config file at config/realtime/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818774](https://gist.github.com/rjurney/5818774)
|
||||||
|
|
||||||
2. Setup a realtime.spec file like this: [https://gist.github.com/rjurney/5818779](https://gist.github.com/rjurney/5818779)
|
2. Setup a realtime.spec file like this: [https://gist.github.com/rjurney/5818779](https://gist.github.com/rjurney/5818779)
|
||||||
|
|
||||||
3. Run the realtime node:
|
3. Run the realtime node:
|
||||||
```bash
|
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
```bash
|
||||||
-Ddruid.realtime.specFile=realtime.spec \
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
||||||
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/realtime \
|
-Ddruid.realtime.specFile=realtime.spec \
|
||||||
com.metamx.druid.realtime.RealtimeMain
|
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/realtime \
|
||||||
```
|
com.metamx.druid.realtime.RealtimeMain
|
||||||
|
```
|
||||||
|
|
||||||
## Booting a Compute Node ##
|
## Booting a Compute Node ##
|
||||||
|
|
||||||
1. Setup a config file at config/compute/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818885](https://gist.github.com/rjurney/5818885)
|
1. Setup a config file at config/compute/runtime.properties that looks like this: [https://gist.github.com/rjurney/5818885](https://gist.github.com/rjurney/5818885)
|
||||||
2. Run the compute node:
|
2. Run the compute node:
|
||||||
```bash
|
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
```bash
|
||||||
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/compute \
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 \
|
||||||
com.metamx.druid.http.ComputeMain
|
-classpath services/target/druid-services-0.5.50-SNAPSHOT-selfcontained.jar:config/compute \
|
||||||
```
|
com.metamx.druid.http.ComputeMain
|
||||||
|
```
|
||||||
|
|
||||||
# Querying Your Data #
|
# Querying Your Data #
|
||||||
|
|
||||||
|
@ -107,6 +114,7 @@ As a shared-nothing system, there are three ways to query druid, against the [Re
|
||||||
### Construct a Query ###
|
### Construct a Query ###
|
||||||
|
|
||||||
For constructing this query, see: Querying against the realtime.spec
|
For constructing this query, see: Querying against the realtime.spec
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"queryType": "groupBy",
|
"queryType": "groupBy",
|
||||||
|
@ -125,57 +133,52 @@ For constructing this query, see: Querying against the realtime.spec
|
||||||
### Querying the Realtime Node ###
|
### Querying the Realtime Node ###
|
||||||
|
|
||||||
Run our query against port 8080:
|
Run our query against port 8080:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://localhost:8080/druid/v2/?pretty" \
|
curl -X POST "http://localhost:8080/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
|
||||||
-H 'content-type: application/json' -d @query.body
|
|
||||||
```
|
```
|
||||||
|
|
||||||
See our result:
|
See our result:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[ {
|
[ {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 5, "wp" : 15000.0, "rows" : 5 }
|
||||||
"imps" : 5,
|
|
||||||
"wp" : 15000.0,
|
|
||||||
"rows" : 5
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Querying the Compute Node ###
|
### Querying the Compute Node ###
|
||||||
Run the query against port 8082:
|
Run the query against port 8082:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://localhost:8082/druid/v2/?pretty" \
|
curl -X POST "http://localhost:8082/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
|
||||||
-H 'content-type: application/json' -d @query.body
|
|
||||||
```
|
```
|
||||||
|
|
||||||
And get (similar to):
|
And get (similar to):
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[ {
|
[ {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 27, "wp" : 77000.0, "rows" : 9 }
|
||||||
"imps" : 27,
|
|
||||||
"wp" : 77000.0,
|
|
||||||
"rows" : 9
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Querying both Nodes via the Broker ###
|
### Querying both Nodes via the Broker ###
|
||||||
Run the query against port 8083:
|
Run the query against port 8083:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://localhost:8083/druid/v2/?pretty" \
|
curl -X POST "http://localhost:8083/druid/v2/?pretty" -H 'content-type: application/json' -d @query.body
|
||||||
-H 'content-type: application/json' -d @query.body
|
|
||||||
```
|
```
|
||||||
|
|
||||||
And get:
|
And get:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[ {
|
[ {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 5, "wp" : 15000.0, "rows" : 5 }
|
||||||
"imps" : 5,
|
|
||||||
"wp" : 15000.0,
|
|
||||||
"rows" : 5
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -189,9 +192,9 @@ How are we to know what queries we can run? Although [Querying](Querying.html) i
|
||||||
[{
|
[{
|
||||||
"schema" : { "dataSource":"druidtest",
|
"schema" : { "dataSource":"druidtest",
|
||||||
"aggregators":[ {"type":"count", "name":"impressions"},
|
"aggregators":[ {"type":"count", "name":"impressions"},
|
||||||
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
|
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
|
||||||
"indexGranularity":"minute",
|
"indexGranularity":"minute",
|
||||||
"shardSpec" : { "type": "none" } },
|
"shardSpec" : { "type": "none" } },
|
||||||
"config" : { "maxRowsInMemory" : 500000,
|
"config" : { "maxRowsInMemory" : 500000,
|
||||||
"intermediatePersistPeriod" : "PT10m" },
|
"intermediatePersistPeriod" : "PT10m" },
|
||||||
"firehose" : { "type" : "kafka-0.7.2",
|
"firehose" : { "type" : "kafka-0.7.2",
|
||||||
|
@ -221,6 +224,7 @@ How are we to know what queries we can run? Although [Querying](Querying.html) i
|
||||||
```json
|
```json
|
||||||
"dataSource":"druidtest"
|
"dataSource":"druidtest"
|
||||||
```
|
```
|
||||||
|
|
||||||
Our dataSource tells us the name of the relation/table, or 'source of data', to query in both our realtime.spec and query.body!
|
Our dataSource tells us the name of the relation/table, or 'source of data', to query in both our realtime.spec and query.body!
|
||||||
|
|
||||||
### aggregations ###
|
### aggregations ###
|
||||||
|
@ -239,7 +243,7 @@ this matches up to the aggregators in the schema of our realtime.spec!
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"aggregators":[ {"type":"count", "name":"impressions"},
|
"aggregators":[ {"type":"count", "name":"impressions"},
|
||||||
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
|
{"type":"doubleSum","name":"wp","fieldName":"wp"}],
|
||||||
```
|
```
|
||||||
|
|
||||||
### dimensions ###
|
### dimensions ###
|
||||||
|
@ -277,48 +281,23 @@ Which gets us grouped data in return!
|
||||||
[ {
|
[ {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 1, "age" : "100", "wp" : 1000.0, "rows" : 1 }
|
||||||
"imps" : 1,
|
|
||||||
"age" : "100",
|
|
||||||
"wp" : 1000.0,
|
|
||||||
"rows" : 1
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 1, "age" : "20", "wp" : 3000.0, "rows" : 1 }
|
||||||
"imps" : 1,
|
|
||||||
"age" : "20",
|
|
||||||
"wp" : 3000.0,
|
|
||||||
"rows" : 1
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 1, "age" : "30", "wp" : 4000.0, "rows" : 1 }
|
||||||
"imps" : 1,
|
|
||||||
"age" : "30",
|
|
||||||
"wp" : 4000.0,
|
|
||||||
"rows" : 1
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 1, "age" : "40", "wp" : 5000.0, "rows" : 1 }
|
||||||
"imps" : 1,
|
|
||||||
"age" : "40",
|
|
||||||
"wp" : 5000.0,
|
|
||||||
"rows" : 1
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 1, "age" : "50", "wp" : 2000.0, "rows" : 1 }
|
||||||
"imps" : 1,
|
|
||||||
"age" : "50",
|
|
||||||
"wp" : 2000.0,
|
|
||||||
"rows" : 1
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -331,11 +310,7 @@ Now that we've observed our dimensions, we can also filter:
|
||||||
"queryType": "groupBy",
|
"queryType": "groupBy",
|
||||||
"dataSource": "druidtest",
|
"dataSource": "druidtest",
|
||||||
"granularity": "all",
|
"granularity": "all",
|
||||||
"filter": {
|
"filter": { "type": "selector", "dimension": "gender", "value": "male" },
|
||||||
"type": "selector",
|
|
||||||
"dimension": "gender",
|
|
||||||
"value": "male"
|
|
||||||
},
|
|
||||||
"aggregations": [
|
"aggregations": [
|
||||||
{"type": "count", "name": "rows"},
|
{"type": "count", "name": "rows"},
|
||||||
{"type": "longSum", "name": "imps", "fieldName": "impressions"},
|
{"type": "longSum", "name": "imps", "fieldName": "impressions"},
|
||||||
|
@ -351,11 +326,7 @@ Which gets us just people aged 40:
|
||||||
[ {
|
[ {
|
||||||
"version" : "v1",
|
"version" : "v1",
|
||||||
"timestamp" : "2010-01-01T00:00:00.000Z",
|
"timestamp" : "2010-01-01T00:00:00.000Z",
|
||||||
"event" : {
|
"event" : { "imps" : 3, "wp" : 9000.0, "rows" : 3 }
|
||||||
"imps" : 3,
|
|
||||||
"wp" : 9000.0,
|
|
||||||
"rows" : 3
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -363,4 +334,4 @@ Check out [Filters](Filters.html) for more.
|
||||||
|
|
||||||
## Learn More ##
|
## Learn More ##
|
||||||
|
|
||||||
You can learn more about querying at [Querying](Querying.html)! Now check out [Booting a production cluster](Booting-a-production-cluster.html)!
|
You can learn more about querying at [Querying](Querying.html)! Now check out [Booting a production cluster](Booting-a-production-cluster.html)!
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Querying
|
Querying
|
||||||
========
|
========
|
||||||
|
@ -8,86 +8,100 @@ Queries are made using an HTTP REST style request to a [Broker](Broker.html), [C
|
||||||
|
|
||||||
We start by describing an example query with additional comments that mention possible variations. Query operators are also summarized in a table below.
|
We start by describing an example query with additional comments that mention possible variations. Query operators are also summarized in a table below.
|
||||||
|
|
||||||
Example Query “rand”
|
Example Query "rand"
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
Here is the query in the examples/rand subproject (file is query.body), followed by a commented version of the same.
|
Here is the query in the examples/rand subproject (file is query.body), followed by a commented version of the same.
|
||||||
|
|
||||||
\`\`\`javascript
|
```javascript
|
||||||
{
|
{
|
||||||
[queryType]() “groupBy”,
|
"queryType": "groupBy",
|
||||||
[dataSource]() “randSeq”,
|
"dataSource": "randSeq",
|
||||||
[granularity]() “all”,
|
"granularity": "all",
|
||||||
[dimensions]() [],
|
"dimensions": [],
|
||||||
[aggregations]() [
|
"aggregations": [
|
||||||
{ [type]() “count”, [name]() “rows” },
|
{ "type": "count", "name": "rows" },
|
||||||
{ [type]() “doubleSum”, [fieldName]() “events”, [name]() “e” },
|
{ "type": "doubleSum", "fieldName": "events", "name": "e" },
|
||||||
{ [type]() “doubleSum”, [fieldName]() “outColumn”, [name]() “randomNumberSum” }
|
{ "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum" }
|
||||||
],
|
],
|
||||||
[postAggregations]() [{
|
"postAggregations": [{
|
||||||
[type]() “arithmetic”,
|
"type": "arithmetic",
|
||||||
[name]() “avg\_random”,
|
"name": "avg_random",
|
||||||
[fn]() “/”,
|
"fn": "/",
|
||||||
[fields]() [
|
"fields": [
|
||||||
{ [type]() “fieldAccess”, [fieldName]() “randomNumberSum” },
|
{ "type": "fieldAccess", "fieldName": "randomNumberSum" },
|
||||||
{ [type]() “fieldAccess”, [fieldName]() “rows” }
|
{ "type": "fieldAccess", "fieldName": "rows" }
|
||||||
]
|
]
|
||||||
}],
|
}],
|
||||||
[intervals]() [“2012-10-01T00:00/2020-01-01T00”]
|
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
|
||||||
}
|
}
|
||||||
\`\`\`
|
```
|
||||||
|
|
||||||
This query could be submitted via curl like so (assuming the query object is in a file “query.json”).
|
This query could be submitted via curl like so (assuming the query object is in a file "query.json").
|
||||||
|
|
||||||
curl -X POST "http://host:port/druid/v2/?pretty" -H 'content-type: application/json' -d @query.json
|
```
|
||||||
|
curl -X POST "http://host:port/druid/v2/?pretty" -H 'content-type: application/json' -d @query.json
|
||||||
|
```
|
||||||
|
|
||||||
The “pretty” query parameter gets the results formatted a bit nicer.
|
The "pretty" query parameter gets the results formatted a bit nicer.
|
||||||
|
|
||||||
Details of Example Query “rand”
|
Details of Example Query "rand"
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
The queryType JSON field identifies which kind of query operator is to be used, in this case it is groupBy, the most frequently used kind (which corresponds to an internal implementation class GroupByQuery registered as “groupBy”), and it has a set of required fields that are also part of this query. The queryType can also be “search” or “timeBoundary” which have similar or different required fields summarized below:
|
The queryType JSON field identifies which kind of query operator is to be used, in this case it is groupBy, the most frequently used kind (which corresponds to an internal implementation class GroupByQuery registered as "groupBy"), and it has a set of required fields that are also part of this query. The queryType can also be "search" or "timeBoundary" which have similar or different required fields summarized below:
|
||||||
\`\`\`javascript
|
|
||||||
|
```javascript
|
||||||
{
|
{
|
||||||
[queryType]() “groupBy”,
|
"queryType": "groupBy",
|
||||||
\`\`\`
|
```
|
||||||
The dataSource JSON field shown next identifies where to apply the query. In this case, randSeq corresponds to the examples/rand/rand\_realtime.spec file schema:
|
|
||||||
\`\`\`javascript
|
The dataSource JSON field shown next identifies where to apply the query. In this case, randSeq corresponds to the examples/rand/rand_realtime.spec file schema:
|
||||||
[dataSource]() “randSeq”,
|
|
||||||
\`\`\`
|
```javascript
|
||||||
The granularity JSON field specifies the bucket size for values. It could be a built-in time interval like “second”, “minute”, “fifteen\_minute”, “thirty\_minute”, “hour” or “day”. It can also be an expression like `{"type": "period", "period":"PT6m"}` meaning “6 minute buckets”. See [Granularities](Granularities.html) for more information on the different options for this field. In this example, it is set to the special value “all” which means [bucket all data points together into the same time bucket]()
|
"dataSource": "randSeq",
|
||||||
\`\`\`javascript
|
```
|
||||||
[granularity]() “all”,
|
|
||||||
\`\`\`
|
The granularity JSON field specifies the bucket size for values. It could be a built-in time interval like "second", "minute", "fifteen_minute", "thirty_minute", "hour" or "day". It can also be an expression like `{"type": "period", "period":"PT6m"}` meaning "6 minute buckets". See [Granularities](Granularities.html) for more information on the different options for this field. In this example, it is set to the special value "all" which means [bucket all data points together into the same time bucket]()
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
"granularity": "all",
|
||||||
|
```
|
||||||
|
|
||||||
The dimensions JSON field value is an array of zero or more fields as defined in the dataSource spec file or defined in the input records and carried forward. These are used to constrain the grouping. If empty, then one value per time granularity bucket is requested in the groupBy:
|
The dimensions JSON field value is an array of zero or more fields as defined in the dataSource spec file or defined in the input records and carried forward. These are used to constrain the grouping. If empty, then one value per time granularity bucket is requested in the groupBy:
|
||||||
\`\`\`javascript
|
|
||||||
[dimensions]() [],
|
```javascript
|
||||||
\`\`\`
|
"dimensions": [],
|
||||||
A groupBy also requires the JSON field “aggregations” (See [Aggregations](Aggregations.html)), which are applied to the column specified by fieldName and the output of the aggregation will be named according to the value in the “name” field:
|
```
|
||||||
\`\`\`javascript
|
|
||||||
[aggregations]() [
|
A groupBy also requires the JSON field "aggregations" (See [Aggregations](Aggregations.html)), which are applied to the column specified by fieldName and the output of the aggregation will be named according to the value in the "name" field:
|
||||||
{ [type]() “count”, [name]() “rows” },
|
|
||||||
{ [type]() “doubleSum”, [fieldName]() “events”, [name]() “e” },
|
```javascript
|
||||||
{ [type]() “doubleSum”, [fieldName]() “outColumn”, [name]() “randomNumberSum” }
|
"aggregations": [
|
||||||
],
|
{ "type": "count", "name": "rows" },
|
||||||
\`\`\`
|
{ "type": "doubleSum", "fieldName": "events", "name": "e" },
|
||||||
You can also specify postAggregations, which are applied after data has been aggregated for the current granularity and dimensions bucket. See [Post Aggregations](Post Aggregations.html) for a detailed description. In the rand example, an arithmetic type operation (division, as specified by “fn”) is performed with the result “name” of “avg\_random”. The “fields” field specifies the inputs from the aggregation stage to this expression. Note that identifiers corresponding to “name” JSON field inside the type “fieldAccess” are required but not used outside this expression, so they are prefixed with “dummy” for clarity:
|
{ "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum" }
|
||||||
\`\`\`javascript
|
],
|
||||||
[postAggregations]() [{
|
```
|
||||||
[type]() “arithmetic”,
|
|
||||||
[name]() “avg\_random”,
|
You can also specify postAggregations, which are applied after data has been aggregated for the current granularity and dimensions bucket. See [Post Aggregations](Post Aggregations.html) for a detailed description. In the rand example, an arithmetic type operation (division, as specified by "fn") is performed with the result "name" of "avg_random". The "fields" field specifies the inputs from the aggregation stage to this expression. Note that identifiers corresponding to "name" JSON field inside the type "fieldAccess" are required but not used outside this expression, so they are prefixed with "dummy" for clarity:
|
||||||
[fn]() “/”,
|
|
||||||
[fields]() [
|
```javascript
|
||||||
{ [type]() “fieldAccess”, [fieldName]() “randomNumberSum” },
|
"postAggregations": [{
|
||||||
{ [type]() “fieldAccess”, [fieldName]() “rows” }
|
"type": "arithmetic",
|
||||||
]
|
"name": "avg_random",
|
||||||
}],
|
"fn": "/",
|
||||||
\`\`\`
|
"fields": [
|
||||||
|
{ "type": "fieldAccess", "fieldName": "randomNumberSum" },
|
||||||
|
{ "type": "fieldAccess", "fieldName": "rows" }
|
||||||
|
]
|
||||||
|
}],
|
||||||
|
```
|
||||||
The time range(s) of the query; data outside the specified intervals will not be used; this example specifies from October 1, 2012 until January 1, 2020:
|
The time range(s) of the query; data outside the specified intervals will not be used; this example specifies from October 1, 2012 until January 1, 2020:
|
||||||
\`\`\`javascript
|
|
||||||
[intervals]() [“2012-10-01T00:00/2020-01-01T00”]
|
```javascript
|
||||||
|
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
|
||||||
}
|
}
|
||||||
\`\`\`
|
```
|
||||||
|
|
||||||
Query Operators
|
Query Operators
|
||||||
---------------
|
---------------
|
||||||
|
@ -99,8 +113,8 @@ The following table summarizes query properties.
|
||||||
|timeseries, groupBy, search, timeBoundary|dataSource|query is applied to this data source|yes|
|
|timeseries, groupBy, search, timeBoundary|dataSource|query is applied to this data source|yes|
|
||||||
|timeseries, groupBy, search|intervals|range of time series to include in query|yes|
|
|timeseries, groupBy, search|intervals|range of time series to include in query|yes|
|
||||||
|timeseries, groupBy, search, timeBoundary|context|This is a key-value map that can allow the query to alter some of the behavior of a query. It is primarily used for debugging, for example if you include `"bySegment":true` in the map, you will get results associated with the data segment they came from.|no|
|
|timeseries, groupBy, search, timeBoundary|context|This is a key-value map that can allow the query to alter some of the behavior of a query. It is primarily used for debugging, for example if you include `"bySegment":true` in the map, you will get results associated with the data segment they came from.|no|
|
||||||
|timeseries, groupBy, search|filter|Specifies the filter (the “WHERE” clause in SQL) for the query. See [Filters](Filters.html)|no|
|
|timeseries, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|
||||||
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. “hour”). See [Granularities](Granularities.html) for more information.|no|
|
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|
||||||
|groupBy|dimensions|constrains the groupings; if empty, then one value per time granularity bucket|yes|
|
|groupBy|dimensions|constrains the groupings; if empty, then one value per time granularity bucket|yes|
|
||||||
|timeseries, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
|timeseries, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
||||||
|timeseries, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
|timeseries, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Realtime
|
Realtime
|
||||||
========
|
========
|
||||||
|
@ -23,38 +23,38 @@ Configuration
|
||||||
|
|
||||||
Realtime nodes take a mix of base server configuration and spec files that describe how to connect, process and expose the realtime feed. See [Configuration](Configuration.html) for information about general server configuration.
|
Realtime nodes take a mix of base server configuration and spec files that describe how to connect, process and expose the realtime feed. See [Configuration](Configuration.html) for information about general server configuration.
|
||||||
|
|
||||||
### Realtime “specFile”
|
### Realtime "specFile"
|
||||||
|
|
||||||
The property `druid.realtime.specFile` has the path of a file (absolute or relative path and file name) with realtime specifications in it. This “specFile” should be a JSON Array of JSON objects like the following:
|
The property `druid.realtime.specFile` has the path of a file (absolute or relative path and file name) with realtime specifications in it. This "specFile" should be a JSON Array of JSON objects like the following:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
[{
|
[{
|
||||||
"schema" : { "dataSource":"dataSourceName",
|
"schema" : { "dataSource":"dataSourceName",
|
||||||
"aggregators":[ {"type":"count", "name":"events"},
|
"aggregators":[ {"type":"count", "name":"events"},
|
||||||
{"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
|
{"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
|
||||||
"indexGranularity":"minute",
|
"indexGranularity":"minute",
|
||||||
"shardSpec" : { "type": "none" } },
|
"shardSpec" : { "type": "none" } },
|
||||||
"config" : { "maxRowsInMemory" : 500000,
|
"config" : { "maxRowsInMemory" : 500000,
|
||||||
"intermediatePersistPeriod" : "PT10m" },
|
"intermediatePersistPeriod" : "PT10m" },
|
||||||
"firehose" : { "type" : "kafka-0.7.2",
|
"firehose" : { "type" : "kafka-0.7.2",
|
||||||
"consumerProps" : { "zk.connect" : "zk_connect_string",
|
"consumerProps" : { "zk.connect" : "zk_connect_string",
|
||||||
"zk.connectiontimeout.ms" : "15000",
|
"zk.connectiontimeout.ms" : "15000",
|
||||||
"zk.sessiontimeout.ms" : "15000",
|
"zk.sessiontimeout.ms" : "15000",
|
||||||
"zk.synctime.ms" : "5000",
|
"zk.synctime.ms" : "5000",
|
||||||
"groupid" : "consumer-group",
|
"groupid" : "consumer-group",
|
||||||
"fetch.size" : "1048586",
|
"fetch.size" : "1048586",
|
||||||
"autooffset.reset" : "largest",
|
"autooffset.reset" : "largest",
|
||||||
"autocommit.enable" : "false" },
|
"autocommit.enable" : "false" },
|
||||||
"feed" : "your_kafka_topic",
|
"feed" : "your_kafka_topic",
|
||||||
"parser" : { "timestampSpec" : { "column" : "timestamp", "format" : "iso" },
|
"parser" : { "timestampSpec" : { "column" : "timestamp", "format" : "iso" },
|
||||||
"data" : { "format" : "json" },
|
"data" : { "format" : "json" },
|
||||||
"dimensionExclusions" : ["value"] } },
|
"dimensionExclusions" : ["value"] } },
|
||||||
"plumber" : { "type" : "realtime",
|
"plumber" : { "type" : "realtime",
|
||||||
"windowPeriod" : "PT10m",
|
"windowPeriod" : "PT10m",
|
||||||
"segmentGranularity":"hour",
|
"segmentGranularity":"hour",
|
||||||
"basePersistDirectory" : "/tmp/realtime/basePersist" }
|
"basePersistDirectory" : "/tmp/realtime/basePersist" }
|
||||||
}]
|
}]
|
||||||
</code>
|
```
|
||||||
|
|
||||||
This is a JSON Array so you can give more than one realtime stream to a given node. The number you can put in the same process depends on the exact configuration. In general, it is best to think of each realtime stream handler as requiring 2-threads: 1 thread for data consumption and aggregation, 1 thread for incremental persists and other background tasks.
|
This is a JSON Array so you can give more than one realtime stream to a given node. The number you can put in the same process depends on the exact configuration. In general, it is best to think of each realtime stream handler as requiring 2-threads: 1 thread for data consumption and aggregation, 1 thread for incremental persists and other background tasks.
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ This describes the data schema for the output Druid segment. More information ab
|
||||||
|-----|----|-----------|--------|
|
|-----|----|-----------|--------|
|
||||||
|aggregators|Array of Objects|The list of aggregators to use to aggregate colliding rows together.|yes|
|
|aggregators|Array of Objects|The list of aggregators to use to aggregate colliding rows together.|yes|
|
||||||
|dataSource|String|The name of the dataSource that the segment belongs to.|yes|
|
|dataSource|String|The name of the dataSource that the segment belongs to.|yes|
|
||||||
|indexGranularity|String|The granularity of the data inside the segment. E.g. a value of “minute” will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows.|yes|
|
|indexGranularity|String|The granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows.|yes|
|
||||||
|segmentGranularity|String|The granularity of the segment as a whole. This is generally larger than the index granularity and describes the rate at which the realtime server will push segments out for historical servers to take over.|yes|
|
|segmentGranularity|String|The granularity of the segment as a whole. This is generally larger than the index granularity and describes the rate at which the realtime server will push segments out for historical servers to take over.|yes|
|
||||||
|shardSpec|Object|This describes the shard that is represented by this server. This must be specified properly in order to have multiple realtime nodes indexing the same data stream in a sharded fashion.|no|
|
|shardSpec|Object|This describes the shard that is represented by this server. This must be specified properly in order to have multiple realtime nodes indexing the same data stream in a sharded fashion.|no|
|
||||||
|
|
||||||
|
@ -94,7 +94,8 @@ Constraints
|
||||||
|
|
||||||
The following tables summarizes constraints between settings in the spec file for the Realtime subsystem.
|
The following tables summarizes constraints between settings in the spec file for the Realtime subsystem.
|
||||||
|
|
||||||
|*. Name |*. Effect |*. Minimum |*. Recommended |
|
|Name|Effect|Minimum|Recommended|
|
||||||
|
|----|------|-------|-----------|
|
||||||
| windowPeriod| when reading an InputRow, events with timestamp older than now minus this window are discarded | time jitter tolerance | use this to reject outliers |
|
| windowPeriod| when reading an InputRow, events with timestamp older than now minus this window are discarded | time jitter tolerance | use this to reject outliers |
|
||||||
| segmentGranularity| time granularity (minute, hour, day, week, month) for loading data at query time | equal to indexGranularity| more than indexGranularity|
|
| segmentGranularity| time granularity (minute, hour, day, week, month) for loading data at query time | equal to indexGranularity| more than indexGranularity|
|
||||||
| indexGranularity| time granularity (minute, hour, day, week, month) of indexes | less than segmentGranularity| minute, hour, day, week, month |
|
| indexGranularity| time granularity (minute, hour, day, week, month) of indexes | less than segmentGranularity| minute, hour, day, week, month |
|
||||||
|
@ -115,8 +116,8 @@ Extending the code
|
||||||
|
|
||||||
Realtime integration is intended to be extended in two ways:
|
Realtime integration is intended to be extended in two ways:
|
||||||
|
|
||||||
1. Connect to data streams from varied systems ([Firehose](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/FirehoseFactory.java))
|
1. Connect to data streams from varied systems ([Firehose](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/firehose/FirehoseFactory.java))
|
||||||
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/master/realtime/src/main/java/com/metamx/druid/realtime/PlumberSchool.java))
|
2. Adjust the publishing strategy to match your needs ([Plumber](https://github.com/metamx/druid/blob/druid-0.5.x/realtime/src/main/java/com/metamx/druid/realtime/plumber/PlumberSchool.java))
|
||||||
|
|
||||||
The expectations are that the former will be very common and something that users of Druid will do on a fairly regular basis. Most users will probably never have to deal with the latter form of customization. Indeed, we hope that all potential use cases can be packaged up as part of Druid proper without requiring proprietary customization.
|
The expectations are that the former will be very common and something that users of Druid will do on a fairly regular basis. Most users will probably never have to deal with the latter form of customization. Indeed, we hope that all potential use cases can be packaged up as part of Druid proper without requiring proprietary customization.
|
||||||
|
|
||||||
|
@ -124,34 +125,34 @@ Given those expectations, adding a firehose is straightforward and completely en
|
||||||
|
|
||||||
We will do our best to accept contributions from the community of new Firehoses and Plumbers, but we also understand the requirement for being able to plug in your own proprietary implementations. The model for doing this is by embedding the druid code in another project and writing your own `main()` method that initializes a RealtimeNode object and registers your proprietary objects with it.
|
We will do our best to accept contributions from the community of new Firehoses and Plumbers, but we also understand the requirement for being able to plug in your own proprietary implementations. The model for doing this is by embedding the druid code in another project and writing your own `main()` method that initializes a RealtimeNode object and registers your proprietary objects with it.
|
||||||
|
|
||||||
<code>
|
```java
|
||||||
public class MyRealtimeMain
|
public class MyRealtimeMain
|
||||||
{
|
{
|
||||||
private static final Logger log = new Logger(MyRealtimeMain.class);
|
private static final Logger log = new Logger(MyRealtimeMain.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception
|
public static void main(String[] args) throws Exception
|
||||||
{
|
{
|
||||||
LogLevelAdjuster.register();
|
LogLevelAdjuster.register();
|
||||||
|
|
||||||
Lifecycle lifecycle = new Lifecycle();
|
Lifecycle lifecycle = new Lifecycle();
|
||||||
|
|
||||||
lifecycle.addManagedInstance(
|
lifecycle.addManagedInstance(
|
||||||
RealtimeNode.builder()
|
RealtimeNode.builder()
|
||||||
.build()
|
.build()
|
||||||
.registerJacksonSubtype(foo.bar.MyFirehose.class)
|
.registerJacksonSubtype(foo.bar.MyFirehose.class)
|
||||||
);
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
lifecycle.start();
|
lifecycle.start();
|
||||||
}
|
|
||||||
catch (Throwable t) {
|
|
||||||
log.info(t, "Throwable caught at startup, committing seppuku");
|
|
||||||
System.exit(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
lifecycle.join();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
</code>
|
catch (Throwable t) {
|
||||||
|
log.info(t, "Throwable caught at startup, committing seppuku");
|
||||||
|
System.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Pluggable pieces of the system are either handled by a setter on the RealtimeNode object, or they are configuration driven and need to be setup to allow for [Jackson polymorphic deserialization](http://wiki.fasterxml.com/JacksonPolymorphicDeserialization) and registered via the relevant methods on the RealtimeNode object.
|
Pluggable pieces of the system are either handled by a setter on the RealtimeNode object, or they are configuration driven and need to be setup to allow for [Jackson polymorphic deserialization](http://wiki.fasterxml.com/JacksonPolymorphicDeserialization) and registered via the relevant methods on the RealtimeNode object.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Note: It is recommended that the master console is used to configure rules. However, the master node does have HTTP endpoints to programmatically configure rules.
|
Note: It is recommended that the master console is used to configure rules. However, the master node does have HTTP endpoints to programmatically configure rules.
|
||||||
|
|
||||||
|
@ -12,33 +12,33 @@ Load rules indicate how many replicants of a segment should exist in a server ti
|
||||||
|
|
||||||
Interval load rules are of the form:
|
Interval load rules are of the form:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"type" : "loadByInterval",
|
"type" : "loadByInterval",
|
||||||
"interval" : "2012-01-01/2013-01-01",
|
"interval" : "2012-01-01/2013-01-01",
|
||||||
"tier" : "hot"
|
"tier" : "hot"
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
type - this should always be “loadByInterval”
|
* `type` - this should always be "loadByInterval"
|
||||||
interval - A JSON Object representing ISO-8601 Intervals
|
* `interval` - A JSON Object representing ISO-8601 Intervals
|
||||||
tier - the configured compute node tier
|
* `tier` - the configured compute node tier
|
||||||
|
|
||||||
### Period Load Rule
|
### Period Load Rule
|
||||||
|
|
||||||
Period load rules are of the form:
|
Period load rules are of the form:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"type" : "loadByInterval",
|
"type" : "loadByPeriod",
|
||||||
"period" : "P1M",
|
"period" : "P1M",
|
||||||
"tier" : "hot"
|
"tier" : "hot"
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
type - this should always be “loadByPeriod”
|
* `type` - this should always be "loadByPeriod"
|
||||||
period - A JSON Object representing ISO-8601 Periods
|
* `period` - A JSON Object representing ISO-8601 Periods
|
||||||
tier - the configured compute node tier
|
* `tier` - the configured compute node tier
|
||||||
|
|
||||||
The interval of a segment will be compared against the specified period. The rule matches if the period overlaps the interval.
|
The interval of a segment will be compared against the specified period. The rule matches if the period overlaps the interval.
|
||||||
|
|
||||||
|
@ -51,15 +51,15 @@ Drop rules indicate when segments should be dropped from the cluster.
|
||||||
|
|
||||||
Interval drop rules are of the form:
|
Interval drop rules are of the form:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"type" : "dropByInterval",
|
"type" : "dropByInterval",
|
||||||
"interval" : "2012-01-01/2013-01-01"
|
"interval" : "2012-01-01/2013-01-01"
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
type - this should always be “dropByInterval”
|
* `type` - this should always be "dropByInterval"
|
||||||
interval - A JSON Object representing ISO-8601 Periods
|
* `interval` - A JSON Object representing ISO-8601 Periods
|
||||||
|
|
||||||
A segment is dropped if the interval contains the interval of the segment.
|
A segment is dropped if the interval contains the interval of the segment.
|
||||||
|
|
||||||
|
@ -67,14 +67,14 @@ A segment is dropped if the interval contains the interval of the segment.
|
||||||
|
|
||||||
Period drop rules are of the form:
|
Period drop rules are of the form:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"type" : "dropByPeriod",
|
"type" : "dropByPeriod",
|
||||||
"period" : "P1M"
|
"period" : "P1M"
|
||||||
}
|
}
|
||||||
</code>
|
```
|
||||||
|
|
||||||
type - this should always be “dropByPeriod”
|
* `type` - this should always be "dropByPeriod"
|
||||||
period - A JSON Object representing ISO-8601 Periods
|
* `period` - A JSON Object representing ISO-8601 Periods
|
||||||
|
|
||||||
The interval of a segment will be compared against the specified period. The period is from some time in the past to the current time. The rule matches if the period contains the interval.
|
The interval of a segment will be compared against the specified period. The period is from some time in the past to the current time. The rule matches if the period contains the interval.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
A search query returns dimension values that match the search specification.
|
A search query returns dimension values that match the search specification.
|
||||||
|
|
||||||
|
@ -28,14 +28,14 @@ There are several main parts to a search query:
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|queryType|This String should always be “search”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
|queryType|This String should always be "search"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||||
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|
||||||
|filter|See [Filters](Filters.html)|no|
|
|filter|See [Filters](Filters.html)|no|
|
||||||
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
||||||
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|
|searchDimensions|The dimensions to run the search over. Excluding this means the search is run over all dimensions.|no|
|
||||||
|query|See [SearchQuerySpec](SearchQuerySpec.html).|yes|
|
|query|See [SearchQuerySpec](SearchQuerySpec.html).|yes|
|
||||||
|sort|How the results of the search should sorted. Two possible types here are “lexicographic” and “strlen”.|yes|
|
|sort|How the results of the search should sorted. Two possible types here are "lexicographic" and "strlen".|yes|
|
||||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
|
@ -1,26 +1,28 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Search query specs define how a “match” is defined between a search value and a dimension value. The available search query specs are:
|
Search query specs define how a "match" is defined between a search value and a dimension value. The available search query specs are:
|
||||||
|
|
||||||
InsensitiveContainsSearchQuerySpec
|
InsensitiveContainsSearchQuerySpec
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
|
||||||
If any part of a dimension value contains the value specified in this search query spec, regardless of case, a “match” occurs. The grammar is:
|
If any part of a dimension value contains the value specified in this search query spec, regardless of case, a "match" occurs. The grammar is:
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "insensitive_contains",
|
{
|
||||||
"value" : "some_value"
|
"type" : "insensitive_contains",
|
||||||
}
|
"value" : "some_value"
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
FragmentSearchQuerySpec
|
FragmentSearchQuerySpec
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
If any part of a dimension value contains any of the values specified in this search query spec, regardless of case, a “match” occurs. The grammar is:
|
If any part of a dimension value contains any of the values specified in this search query spec, regardless of case, a "match" occurs. The grammar is:
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"type" : "fragment",
|
{
|
||||||
"values" : ["fragment1", "fragment2"]
|
"type" : "fragment",
|
||||||
}
|
"values" : ["fragment1", "fragment2"]
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
|
@ -1,26 +1,28 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Segment metadata queries return per segment information about:
|
Segment metadata queries return per segment information about:
|
||||||
\* Cardinality of all columns in the segment
|
|
||||||
\* Estimated byte size for the segment columns in TSV format
|
|
||||||
\* Interval the segment covers
|
|
||||||
\* Column type of all the columns in the segment
|
|
||||||
\* Estimated total segment byte size in TSV format
|
|
||||||
\* Segment id
|
|
||||||
|
|
||||||
<code>{
|
* Cardinality of all columns in the segment
|
||||||
"queryType":"segmentMetadata",
|
* Estimated byte size for the segment columns in TSV format
|
||||||
"dataSource":"sample_datasource",
|
* Interval the segment covers
|
||||||
"intervals":["2013-01-01/2014-01-01"],
|
* Column type of all the columns in the segment
|
||||||
}
|
* Estimated total segment byte size in TSV format
|
||||||
</code>
|
* Segment id
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"queryType":"segmentMetadata",
|
||||||
|
"dataSource":"sample_datasource",
|
||||||
|
"intervals":["2013-01-01/2014-01-01"],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
There are several main parts to a segment metadata query:
|
There are several main parts to a segment metadata query:
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|queryType|This String should always be “segmentMetadata”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
|queryType|This String should always be "segmentMetadata"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||||
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
||||||
|merge|Merge all individual segment metadata results into a single result|no|
|
|merge|Merge all individual segment metadata results into a single result|no|
|
||||||
|
@ -28,31 +30,16 @@ There are several main parts to a segment metadata query:
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
||||||
<code>[ {
|
```json
|
||||||
"id" : "some_id",
|
[ {
|
||||||
"intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ],
|
"id" : "some_id",
|
||||||
"columns" : {
|
"intervals" : [ "2013-05-13T00:00:00.000Z/2013-05-14T00:00:00.000Z" ],
|
||||||
"__time" : {
|
"columns" : {
|
||||||
"type" : "LONG",
|
"__time" : { "type" : "LONG", "size" : 407240380, "cardinality" : null },
|
||||||
"size" : 407240380,
|
"dim1" : { "type" : "STRING", "size" : 100000, "cardinality" : 1944 },
|
||||||
"cardinality" : null
|
"dim2" : { "type" : "STRING", "size" : 100000, "cardinality" : 1504 },
|
||||||
},
|
"metric1" : { "type" : "FLOAT", "size" : 100000, "cardinality" : null }
|
||||||
"dim1" : {
|
},
|
||||||
"type" : "STRING",
|
"size" : 300000
|
||||||
"size" : 100000,
|
} ]
|
||||||
"cardinality" : 1944
|
```
|
||||||
},
|
|
||||||
"dim2" : {
|
|
||||||
"type" : "STRING",
|
|
||||||
"size" : 100000,
|
|
||||||
"cardinality" : 1504
|
|
||||||
},
|
|
||||||
"metric1" : {
|
|
||||||
"type" : "FLOAT",
|
|
||||||
"size" : 100000,
|
|
||||||
"cardinality" : null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"size" : 300000
|
|
||||||
} ]
|
|
||||||
</code>
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Segments
|
Segments
|
||||||
========
|
========
|
||||||
|
@ -14,36 +14,28 @@ Naming Convention
|
||||||
Identifiers for segments are typically constructed using the segment datasource, interval start time (in ISO 8601 format), interval end time (in ISO 8601 format), and a version. If data is additionally sharded beyond a time range, the segment identifier will also contain a partition number.
|
Identifiers for segments are typically constructed using the segment datasource, interval start time (in ISO 8601 format), interval end time (in ISO 8601 format), and a version. If data is additionally sharded beyond a time range, the segment identifier will also contain a partition number.
|
||||||
|
|
||||||
An example segment identifier may be:
|
An example segment identifier may be:
|
||||||
datasource\_intervalStart\_intervalEnd\_version\_partitionNum
|
datasource_intervalStart_intervalEnd_version_partitionNum
|
||||||
|
|
||||||
Segment Components
|
Segment Components
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
A segment is compromised of several files, listed below.
|
A segment is compromised of several files, listed below.
|
||||||
|
|
||||||
### `version.bin`
|
* `version.bin`
|
||||||
|
|
||||||
4 bytes representing the current segment version as an integer. E.g., for v9 segments, the version is 0x0, 0x0, 0x0, 0x9
|
4 bytes representing the current segment version as an integer. E.g., for v9 segments, the version is 0x0, 0x0, 0x0, 0x9
|
||||||
|
|
||||||
### `meta.smoosh`
|
* `meta.smoosh`
|
||||||
|
|
||||||
A file with metadata (filenames and offsets) about the contents of the other `smoosh` files
|
A file with metadata (filenames and offsets) about the contents of the other `smoosh` files
|
||||||
|
|
||||||
### `XXXXX.smoosh`
|
* `XXXXX.smoosh`
|
||||||
|
|
||||||
There are some number of these files, which are concatenated binary data
|
There are some number of these files, which are concatenated binary data
|
||||||
|
|
||||||
The `smoosh` files represent multiple files “smooshed” together in order to minimize the number of file descriptors that must be open to house the data. They are files of up to 2GB in size (to match the limit of a memory mapped ByteBuffer in Java). The `smoosh` files house individual files for each of the columns in the data as well as an `index.drd` file with extra metadata about the segment.
|
The `smoosh` files represent multiple files "smooshed" together in order to minimize the number of file descriptors that must be open to house the data. They are files of up to 2GB in size (to match the limit of a memory mapped ByteBuffer in Java). The `smoosh` files house individual files for each of the columns in the data as well as an `index.drd` file with extra metadata about the segment.
|
||||||
|
|
||||||
There is also a special column called `__time` that refers to the time column of the segment. This will hopefully become less and less special as the code evolves, but for now it’s as special as my Mommy always told me I am.
|
There is also a special column called `__time` that refers to the time column of the segment. This will hopefully become less and less special as the code evolves, but for now it’s as special as my Mommy always told me I am.
|
||||||
|
|
||||||
### `index.drd`
|
|
||||||
|
|
||||||
The `index.drd` file houses 3 pieces of data in order
|
|
||||||
|
|
||||||
1. The names of all of the columns of the data
|
|
||||||
2. The names of the “dimensions” of the data (these are the dictionary-encoded, string columns. This is here to support some legacy APIs and will be superfluous in the future)
|
|
||||||
3. The data interval represented by this segment stored as the start and end timestamps as longs
|
|
||||||
|
|
||||||
Format of a column
|
Format of a column
|
||||||
------------------
|
------------------
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Note: This feature is highly experimental and only works with spatially indexed dimensions.
|
Note: This feature is highly experimental and only works with spatially indexed dimensions.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Note: This feature is highly experimental.
|
Note: This feature is highly experimental.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
This page describes how to use Riak-CS for deep storage instead of S3. We are still setting up some of the peripheral stuff (file downloads, etc.).
|
This page describes how to use Riak-CS for deep storage instead of S3. We are still setting up some of the peripheral stuff (file downloads, etc.).
|
||||||
|
|
||||||
|
@ -223,4 +223,4 @@ This just walks through getting the relevant software installed and running. Yo
|
||||||
/etc/init.d/druid_master start
|
/etc/init.d/druid_master start
|
||||||
/etc/init.d/druid_realtime start
|
/etc/init.d/druid_realtime start
|
||||||
/etc/init.d/druid_broker start
|
/etc/init.d/druid_broker start
|
||||||
/etc/init.d/druid_compute start
|
/etc/init.d/druid_compute start
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Numerous backend engineers at [Metamarkets](http://www.metamarkets.com) work on Druid full-time. If you any questions about usage or code, feel free to contact any of us.
|
Numerous backend engineers at [Metamarkets](http://www.metamarkets.com) work on Druid full-time. If you any questions about usage or code, feel free to contact any of us.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Tasks are run on workers and always operate on a single datasource. Once an indexer coordinator node accepts a task, a lock is created for the datasource and interval specified in the task. Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release locks early if they desire. Tasks ids are unique by naming them using UUIDs or the timestamp in which the task was created. Tasks are also part of a “task group”, which is a set of tasks that can share interval locks.
|
Tasks are run on workers and always operate on a single datasource. Once an indexer coordinator node accepts a task, a lock is created for the datasource and interval specified in the task. Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release locks early if they desire. Tasks ids are unique by naming them using UUIDs or the timestamp in which the task was created. Tasks are also part of a "task group", which is a set of tasks that can share interval locks.
|
||||||
|
|
||||||
There are several different types of tasks.
|
There are several different types of tasks.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
|
|
||||||
YourKit supports the Druid open source projects with its
|
YourKit supports the Druid open source projects with its
|
||||||
full-featured Java Profiler.
|
full-featured Java Profiler.
|
||||||
YourKit, LLC is the creator of innovative and intelligent tools for profiling
|
YourKit, LLC is the creator of innovative and intelligent tools for profiling
|
||||||
|
@ -8,4 +9,4 @@ Java and .NET applications. Take a look at YourKit's software products:
|
||||||
<a href="http://www.yourkit.com/java/profiler/index.jsp">YourKit Java
|
<a href="http://www.yourkit.com/java/profiler/index.jsp">YourKit Java
|
||||||
Profiler</a> and
|
Profiler</a> and
|
||||||
<a href="http://www.yourkit.com/.net/profiler/index.jsp">YourKit .NET
|
<a href="http://www.yourkit.com/.net/profiler/index.jsp">YourKit .NET
|
||||||
Profiler</a>.
|
Profiler</a>.
|
||||||
|
|
|
@ -1,29 +1,31 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Time boundary queries return the earliest and latest data points of a data set. The grammar is:
|
Time boundary queries return the earliest and latest data points of a data set. The grammar is:
|
||||||
|
|
||||||
<code>{
|
```json
|
||||||
"queryType" : "timeBoundary",
|
{
|
||||||
"dataSource": "sample_datasource"
|
"queryType" : "timeBoundary",
|
||||||
}
|
"dataSource": "sample_datasource"
|
||||||
</code>
|
}
|
||||||
|
```
|
||||||
|
|
||||||
There are 3 main parts to a time boundary query:
|
There are 3 main parts to a time boundary query:
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|queryType|This String should always be “timeBoundary”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
|queryType|This String should always be "timeBoundary"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
||||||
<code>[ {
|
```json
|
||||||
"timestamp" : "2013-05-09T18:24:00.000Z",
|
[ {
|
||||||
"result" : {
|
"timestamp" : "2013-05-09T18:24:00.000Z",
|
||||||
"minTime" : "2013-05-09T18:24:00.000Z",
|
"result" : {
|
||||||
"maxTime" : "2013-05-09T18:37:00.000Z"
|
"minTime" : "2013-05-09T18:24:00.000Z",
|
||||||
}
|
"maxTime" : "2013-05-09T18:37:00.000Z"
|
||||||
} ]
|
}
|
||||||
</code>
|
} ]
|
||||||
|
```
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Timeseries queries
|
Timeseries queries
|
||||||
==================
|
==================
|
||||||
|
@ -8,81 +8,46 @@ These types of queries take a timeseries query object and return an array of JSO
|
||||||
|
|
||||||
An example timeseries query object is shown below:
|
An example timeseries query object is shown below:
|
||||||
|
|
||||||
<pre>
|
```json
|
||||||
<code>
|
|
||||||
{
|
{
|
||||||
[queryType]() “timeseries”,
|
"queryType": "timeseries",
|
||||||
[dataSource]() “sample\_datasource”,
|
"dataSource": "sample_datasource",
|
||||||
[granularity]() “day”,
|
"granularity": "day",
|
||||||
[filter]() {
|
"filter": {
|
||||||
[type]() “and”,
|
"type": "and",
|
||||||
[fields]() [
|
"fields": [
|
||||||
{
|
{ "type": "selector", "dimension": "sample_dimension1", "value": "sample_value1" },
|
||||||
[type]() “selector”,
|
{ "type": "or",
|
||||||
[dimension]() “sample\_dimension1”,
|
"fields": [
|
||||||
[value]() “sample\_value1”
|
{ "type": "selector", "dimension": "sample_dimension2", "value": "sample_value2" },
|
||||||
},
|
{ "type": "selector", "dimension": "sample_dimension3", "value": "sample_value3" }
|
||||||
{
|
]
|
||||||
[type]() “or”,
|
}
|
||||||
[fields]() [
|
]
|
||||||
{
|
},
|
||||||
[type]() “selector”,
|
"aggregations": [
|
||||||
[dimension]() “sample\_dimension2”,
|
{ "type": "longSum", "name": "sample_name1", "fieldName": "sample_fieldName1" },
|
||||||
[value]() “sample\_value2”
|
{ "type": "doubleSum", "name": "sample_name2", "fieldName": "sample_fieldName2" }
|
||||||
},
|
],
|
||||||
{
|
"postAggregations": [
|
||||||
[type]() “selector”,
|
{ "type": "arithmetic",
|
||||||
[dimension]() “sample\_dimension3”,
|
"name": "sample_divide",
|
||||||
[value]() “sample\_value3”
|
"fn": "/",
|
||||||
}
|
"fields": [
|
||||||
]
|
{ "type": "fieldAccess", "name": "sample_name1", "fieldName": "sample_fieldName1" },
|
||||||
}
|
{ "type": "fieldAccess", "name": "sample_name2", "fieldName": "sample_fieldName2" }
|
||||||
]
|
]
|
||||||
},
|
}
|
||||||
[aggregations]() [
|
],
|
||||||
{
|
"intervals": [ "2012-01-01T00:00:00.000/2012-01-03T00:00:00.000" ]
|
||||||
[type]() “longSum”,
|
|
||||||
[name]() “sample\_name1”,
|
|
||||||
[fieldName]() “sample\_fieldName1”
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]() “doubleSum”,
|
|
||||||
[name]() “sample\_name2”,
|
|
||||||
[fieldName]() “sample\_fieldName2”
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[postAggregations]() [
|
|
||||||
{
|
|
||||||
[type]() “arithmetic”,
|
|
||||||
[name]() “sample\_divide”,
|
|
||||||
[fn]() “/”,
|
|
||||||
[fields]() [
|
|
||||||
{
|
|
||||||
[type]() “fieldAccess”,
|
|
||||||
[name]() “sample\_name1”,
|
|
||||||
[fieldName]() “sample\_fieldName1”
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]() “fieldAccess”,
|
|
||||||
[name]() “sample\_name2”,
|
|
||||||
[fieldName]() “sample\_fieldName2”
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[intervals]() [
|
|
||||||
“2012-01-01T00:00:00.000/2012-01-03T00:00:00.000”
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
```
|
||||||
</pre>
|
|
||||||
</code>
|
|
||||||
|
|
||||||
There are 7 main parts to a timeseries query:
|
There are 7 main parts to a timeseries query:
|
||||||
|
|
||||||
|property|description|required?|
|
|property|description|required?|
|
||||||
|--------|-----------|---------|
|
|--------|-----------|---------|
|
||||||
|queryType|This String should always be “timeseries”; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
|queryType|This String should always be "timeseries"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||||
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|
|granularity|Defines the granularity of the query. See [Granularities](Granularities.html)|yes|
|
||||||
|filter|See [Filters](Filters.html)|no|
|
|filter|See [Filters](Filters.html)|no|
|
||||||
|
@ -91,28 +56,17 @@ There are 7 main parts to a timeseries query:
|
||||||
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
|intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes|
|
||||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||||
|
|
||||||
To pull it all together, the above query would return 2 data points, one for each day between 2012-01-01 and 2012-01-03, from the “sample\_datasource” table. Each data point would be the (long) sum of sample\_fieldName1, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set. The output looks like this:
|
To pull it all together, the above query would return 2 data points, one for each day between 2012-01-01 and 2012-01-03, from the "sample\_datasource" table. Each data point would be the (long) sum of sample\_fieldName1, the (double) sum of sample\_fieldName2 and the (double) the result of sample\_fieldName1 divided by sample\_fieldName2 for the filter set. The output looks like this:
|
||||||
|
|
||||||
<pre>
|
```json
|
||||||
<code>
|
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[timestamp]() “2012-01-01T00:00:00.000Z”,
|
"timestamp": "2012-01-01T00:00:00.000Z",
|
||||||
[result]() {
|
"result": { "sample_name1": <some_value>, "sample_name2": <some_value>, "sample_divide": <some_value> }
|
||||||
[sample\_name1]() <some_value>,
|
},
|
||||||
[sample\_name2]() <some_value>,
|
{
|
||||||
[sample\_divide]() <some_value>
|
"timestamp": "2012-01-02T00:00:00.000Z",
|
||||||
}
|
"result": { "sample_name1": <some_value>, "sample_name2": <some_value>, "sample_divide": <some_value> }
|
||||||
},
|
}
|
||||||
{
|
|
||||||
[timestamp]() “2012-01-02T00:00:00.000Z”,
|
|
||||||
[result]() {
|
|
||||||
[sample\_name1]() <some_value>,
|
|
||||||
[sample\_name2]() <some_value>,
|
|
||||||
[sample\_divide]() <some_value>
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
```
|
||||||
</pre>
|
|
||||||
</code>
|
|
|
@ -1,40 +1,42 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
|
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
|
||||||
|
|
||||||
About the data
|
About the data
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
The data source we’ll be working with is Wikipedia edits. Each time an edit is made in Wikipedia, an event gets pushed to an IRC channel associated with the language of the Wikipedia page. We scrape IRC channels for several different languages and load this data into Druid.
|
The data source we'll be working with is Wikipedia edits. Each time an edit is made in Wikipedia, an event gets pushed to an IRC channel associated with the language of the Wikipedia page. We scrape IRC channels for several different languages and load this data into Druid.
|
||||||
|
|
||||||
Each event has a timestamp indicating the time of the edit (in UTC time), a list of dimensions indicating various metadata about the event (such as information about the user editing the page and where the user resides), and a list of metrics associated with the event (such as the number of characters added and deleted).
|
Each event has a timestamp indicating the time of the edit (in UTC time), a list of dimensions indicating various metadata about the event (such as information about the user editing the page and where the user resides), and a list of metrics associated with the event (such as the number of characters added and deleted).
|
||||||
|
|
||||||
Specifically. the data schema looks like so:
|
Specifically. the data schema looks like so:
|
||||||
|
|
||||||
Dimensions (things to filter on):
|
Dimensions (things to filter on):
|
||||||
\`\`\`json
|
|
||||||
“page”
|
```json
|
||||||
“language”
|
"page"
|
||||||
“user”
|
"language"
|
||||||
“unpatrolled”
|
"user"
|
||||||
“newPage”
|
"unpatrolled"
|
||||||
“robot”
|
"newPage"
|
||||||
“anonymous”
|
"robot"
|
||||||
“namespace”
|
"anonymous"
|
||||||
“continent”
|
"namespace"
|
||||||
“country”
|
"continent"
|
||||||
“region”
|
"country"
|
||||||
“city”
|
"region"
|
||||||
\`\`\`
|
"city"
|
||||||
|
```
|
||||||
|
|
||||||
Metrics (things to aggregate over):
|
Metrics (things to aggregate over):
|
||||||
\`\`\`json
|
|
||||||
“count”
|
```json
|
||||||
“added”
|
"count"
|
||||||
“delta”
|
"added"
|
||||||
“deleted”
|
"delta"
|
||||||
\`\`\`
|
"deleted"
|
||||||
|
```
|
||||||
|
|
||||||
These metrics track the number of characters added, deleted, and changed.
|
These metrics track the number of characters added, deleted, and changed.
|
||||||
|
|
||||||
|
@ -45,120 +47,120 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
||||||
|
|
||||||
### Download a Tarball
|
### Download a Tarball
|
||||||
|
|
||||||
We’ve built a tarball that contains everything you’ll need. You’ll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
|
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
|
||||||
Download this file to a directory of your choosing.
|
Download this file to a directory of your choosing.
|
||||||
|
|
||||||
You can extract the awesomeness within by issuing:
|
You can extract the awesomeness within by issuing:
|
||||||
|
|
||||||
tar -zxvf druid-services-*-bin.tar.gz
|
```
|
||||||
|
tar -zxvf druid-services-*-bin.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
Not too lost so far right? That’s great! If you cd into the directory:
|
Not too lost so far right? That's great! If you cd into the directory:
|
||||||
|
|
||||||
cd druid-services-0.5.54
|
```
|
||||||
|
cd druid-services-0.5.54
|
||||||
|
```
|
||||||
|
|
||||||
You should see a bunch of files:
|
You should see a bunch of files:
|
||||||
\* run\_example\_server.sh
|
|
||||||
\* run\_example\_client.sh
|
* run_example_server.sh
|
||||||
\* LICENSE, config, examples, lib directories
|
* run_example_client.sh
|
||||||
|
* LICENSE, config, examples, lib directories
|
||||||
|
|
||||||
Running Example Scripts
|
Running Example Scripts
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
Let’s start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
|
Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
|
||||||
|
|
||||||
./run_example_server.sh
|
```
|
||||||
|
./run_example_server.sh
|
||||||
|
```
|
||||||
|
|
||||||
Select “wikipedia”.
|
Select "wikipedia".
|
||||||
|
|
||||||
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
|
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
|
||||||
|
|
||||||
<code>
|
```
|
||||||
2013-07-19 21:54:05,154 INFO [main] com.metamx.druid.realtime.RealtimeNode - Starting Jetty
|
2013-07-19 21:54:05,154 INFO [main] com.metamx.druid.realtime.RealtimeNode - Starting Jetty
|
||||||
2013-07-19 21:54:05,154 INFO [main] org.mortbay.log - jetty-6.1.x
|
2013-07-19 21:54:05,154 INFO [main] org.mortbay.log - jetty-6.1.x
|
||||||
2013-07-19 21:54:05,171 INFO [chief-wikipedia] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at [2013-07-19T22:03:00.000Z]
|
2013-07-19 21:54:05,171 INFO [chief-wikipedia] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at [2013-07-19T22:03:00.000Z]
|
||||||
2013-07-19 21:54:05,246 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
|
2013-07-19 21:54:05,246 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
|
||||||
</code>
|
```
|
||||||
|
|
||||||
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
|
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
|
||||||
|
|
||||||
Okay, things are about to get real(~~time). To query the real-time node you’ve spun up, you can issue:
|
Okay, things are about to get real-time. To query the real-time node you've spun up, you can issue:
|
||||||
\<pre\>./run\_example\_client.sh\</pre\>
|
|
||||||
Select “wikipedia” once again. This script issues ]s to the data we’ve been ingesting. The query looks like this:
|
```
|
||||||
\`\`\`json
|
./run_example_client.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Select "wikipedia" once again. This script issues [GroupByQuery](GroupByQuery.html)s to the data we've been ingesting. The query looks like this:
|
||||||
|
|
||||||
|
```json
|
||||||
{
|
{
|
||||||
[queryType]("groupBy"),
|
"queryType":"groupBy",
|
||||||
[dataSource]("wikipedia"),
|
"dataSource":"wikipedia",
|
||||||
[granularity]("minute"),
|
"granularity":"minute",
|
||||||
[dimensions]([)
|
"dimensions":[ "page" ],
|
||||||
“page”
|
"aggregations":[
|
||||||
],
|
{"type":"count", "name":"rows"},
|
||||||
[aggregations]([)
|
{"type":"longSum", "fieldName":"edit_count", "name":"count"}
|
||||||
{
|
],
|
||||||
[type]("count"),
|
"filter":{ "type":"selector", "dimension":"namespace", "value":"article" },
|
||||||
[name]("rows")
|
"intervals":[ "2013-06-01T00:00/2020-01-01T00" ]
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]("longSum"),
|
|
||||||
[fieldName]("edit_count"),
|
|
||||||
[name]("count")
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[filter]({)
|
|
||||||
[type]("selector"),
|
|
||||||
[dimension]("namespace"),
|
|
||||||
[value]("article")
|
|
||||||
},
|
|
||||||
[intervals]([)
|
|
||||||
“2013-06-01T00:00/2020-01-01T00”
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
\`\`\`
|
```
|
||||||
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“namespace”** dimension, to only look at edits on **“articles”**. Our **aggregations** are what we are calculating: a count of the number of data rows, and a count of the number of edits that have occurred.
|
|
||||||
|
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the `dimensions` field: `["page"]`. We are **filtering** via the `namespace` dimension, to only look at edits on `articles`. Our **aggregations** are what we are calculating: a count of the number of data rows, and a count of the number of edits that have occurred.
|
||||||
|
|
||||||
The result looks something like this:
|
The result looks something like this:
|
||||||
\`\`\`json
|
|
||||||
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[version]() “v1”,
|
"version": "v1",
|
||||||
[timestamp]() “2013-09-04T21:44:00.000Z”,
|
"timestamp": "2013-09-04T21:44:00.000Z",
|
||||||
[event]() {
|
"event": { "count": 0, "page": "2013\u201314_Brentford_F.C._season", "rows": 1 }
|
||||||
[count]() 0,
|
},
|
||||||
[page]() “2013\\u201314\_Brentford\_F.C.*season",
|
{
|
||||||
[rows]() 1
|
"version": "v1",
|
||||||
}
|
"timestamp": "2013-09-04T21:44:00.000Z",
|
||||||
},
|
"event": { "count": 0, "page": "8e_\u00e9tape_du_Tour_de_France_2013", "rows": 1 }
|
||||||
{
|
},
|
||||||
[version]() "v1",
|
{
|
||||||
[timestamp]() "2013-09-04T21:44:00.000Z",
|
"version": "v1",
|
||||||
[event]() {
|
"timestamp": "2013-09-04T21:44:00.000Z",
|
||||||
[count]() 0,
|
"event": { "count": 0, "page": "Agenda_of_the_Tea_Party_movement", "rows": 1 }
|
||||||
[page]() "8e*00e9tape\_du\_Tour\_de\_France\_2013”,
|
},
|
||||||
[rows]() 1
|
...
|
||||||
}
|
```
|
||||||
},
|
|
||||||
{
|
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
|
||||||
[version]() “v1”,
|
|
||||||
[timestamp]() “2013-09-04T21:44:00.000Z”,
|
|
||||||
[event]() {
|
|
||||||
[count]() 0,
|
|
||||||
[page]() “Agenda\_of\_the\_Tea\_Party\_movement”,
|
|
||||||
[rows]() 1
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
This groupBy query is a bit complicated and we’ll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
|
|
||||||
h2. Querying Druid
|
h2. Querying Druid
|
||||||
|
|
||||||
In your favorite editor, create the file:
|
In your favorite editor, create the file:
|
||||||
\<pre\>time\_boundary\_query.body\</pre\>
|
|
||||||
|
```
|
||||||
|
time_boundary_query.body
|
||||||
|
```
|
||||||
|
|
||||||
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
||||||
\<pre\><code>
|
|
||||||
|
```
|
||||||
{
|
{
|
||||||
[queryType]() “timeBoundary”,
|
"queryType": "timeBoundary",
|
||||||
[dataSource]() “wikipedia”
|
"dataSource": "wikipedia"
|
||||||
}
|
}
|
||||||
</code>\</pre\>
|
```
|
||||||
The ] is one of the simplest Druid queries. To run the query, you can issue:
|
|
||||||
\<pre\><code> curl~~X POST ‘http://localhost:8083/druid/v2/?pretty’ ~~H ‘content-type: application/json’~~d ```` time_boundary_query.body</code></pre>
|
The [TimeBoundaryQuery](TimeBoundaryQuery.html) is one of the simplest Druid queries. To run the query, you can issue:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @time_boundary_query.body
|
||||||
|
```
|
||||||
|
|
||||||
We get something like this JSON back:
|
We get something like this JSON back:
|
||||||
|
|
||||||
|
@ -171,186 +173,146 @@ We get something like this JSON back:
|
||||||
}
|
}
|
||||||
} ]
|
} ]
|
||||||
```
|
```
|
||||||
|
|
||||||
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
|
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
|
||||||
|
|
||||||
Return to your favorite editor and create the file:
|
Return to your favorite editor and create the file:
|
||||||
<pre>timeseries_query.body</pre>
|
|
||||||
|
```
|
||||||
|
timeseries_query.body
|
||||||
|
```
|
||||||
|
|
||||||
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
|
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
|
||||||
<pre><code>
|
|
||||||
|
```
|
||||||
{
|
{
|
||||||
"queryType": "timeseries",
|
"queryType": "timeseries",
|
||||||
"dataSource": "wikipedia",
|
"dataSource": "wikipedia",
|
||||||
"intervals": [
|
"intervals": [ "2010-01-01/2020-01-01" ],
|
||||||
"2010-01-01/2020-01-01"
|
|
||||||
],
|
|
||||||
"granularity": "all",
|
"granularity": "all",
|
||||||
"aggregations": [
|
"aggregations": [
|
||||||
{
|
{"type": "longSum", "fieldName": "count", "name": "edit_count"},
|
||||||
"type": "longSum",
|
{"type": "doubleSum", "fieldName": "added", "name": "chars_added"}
|
||||||
"fieldName": "count",
|
|
||||||
"name": "edit_count"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "doubleSum",
|
|
||||||
"fieldName": "added",
|
|
||||||
"name": "chars_added"
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
</code></pre>
|
```
|
||||||
|
|
||||||
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
|
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
|
||||||
To issue the query and get some results, run the following in your command line:
|
To issue the query and get some results, run the following in your command line:
|
||||||
<pre><code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
|
|
||||||
|
|
||||||
</pre>
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries_query.body
|
||||||
|
```
|
||||||
|
|
||||||
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[ {
|
[ {
|
||||||
“timestamp” : “2013-09-04T21:44:00.000Z”,
|
"timestamp" : "2013-09-04T21:44:00.000Z",
|
||||||
“result” : {
|
"result" : { "chars_added" : 312670.0, "edit_count" : 733 }
|
||||||
“chars\_added” : 312670.0,
|
|
||||||
“edit\_count” : 733
|
|
||||||
}
|
|
||||||
} ]
|
} ]
|
||||||
\`\`\`
|
```
|
||||||
|
|
||||||
If you issue the query again, you should notice your results updating.
|
If you issue the query again, you should notice your results updating.
|
||||||
|
|
||||||
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
||||||
|
|
||||||
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct! We can specify different granularities to bucket our results, like so:
|
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct! We can specify different granularities to bucket our results, like so:
|
||||||
|
|
||||||
<code>
|
```
|
||||||
{
|
{
|
||||||
"queryType": "timeseries",
|
"queryType": "timeseries",
|
||||||
"dataSource": "wikipedia",
|
"dataSource": "wikipedia",
|
||||||
"intervals": [
|
"intervals": [ "2010-01-01/2020-01-01" ],
|
||||||
"2010-01-01/2020-01-01"
|
"granularity": "minute",
|
||||||
],
|
"aggregations": [
|
||||||
"granularity": "minute",
|
{"type": "longSum", "fieldName": "count", "name": "edit_count"},
|
||||||
"aggregations": [
|
{"type": "doubleSum", "fieldName": "added", "name": "chars_added"}
|
||||||
{
|
]
|
||||||
"type": "longSum",
|
}
|
||||||
"fieldName": "count",
|
```
|
||||||
"name": "edit_count"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "doubleSum",
|
|
||||||
"fieldName": "added",
|
|
||||||
"name": "chars_added"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
This gives us something like the following:
|
This gives us something like the following:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
“timestamp” : “2013-09-04T21:44:00.000Z”,
|
"timestamp" : "2013-09-04T21:44:00.000Z",
|
||||||
“result” : {
|
"result" : { "chars_added" : 30665.0, "edit_count" : 128 }
|
||||||
“chars\_added” : 30665.0,
|
},
|
||||||
“edit\_count” : 128
|
{
|
||||||
}
|
"timestamp" : "2013-09-04T21:45:00.000Z",
|
||||||
}, {
|
"result" : { "chars_added" : 122637.0, "edit_count" : 167 }
|
||||||
“timestamp” : “2013-09-04T21:45:00.000Z”,
|
},
|
||||||
“result” : {
|
{
|
||||||
“chars\_added” : 122637.0,
|
"timestamp" : "2013-09-04T21:46:00.000Z",
|
||||||
“edit\_count” : 167
|
"result" : { "chars_added" : 78938.0, "edit_count" : 159 }
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“timestamp” : “2013-09-04T21:46:00.000Z”,
|
|
||||||
“result” : {
|
|
||||||
“chars\_added” : 78938.0,
|
|
||||||
“edit\_count” : 159
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
…
|
...
|
||||||
\`\`\`
|
]
|
||||||
|
```
|
||||||
|
|
||||||
Solving a Problem
|
Solving a Problem
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
One of Druid’s main powers is to provide answers to problems, so let’s pose a problem. What if we wanted to know what the top pages in the US are, ordered by the number of edits over the last few minutes you’ve been going through this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
|
One of Druid's main powers is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top pages in the US are, ordered by the number of edits over the last few minutes you've been going through this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results... and it turns out we can!
|
||||||
|
|
||||||
Let’s create the file:
|
Let's create the file:
|
||||||
|
|
||||||
group_by_query.body</pre>
|
```
|
||||||
and put the following in there:
|
group_by_query.body
|
||||||
<pre><code>
|
```
|
||||||
{
|
|
||||||
"queryType": "groupBy",
|
|
||||||
"dataSource": "wikipedia",
|
|
||||||
"granularity": "all",
|
|
||||||
"dimensions": [
|
|
||||||
"page"
|
|
||||||
],
|
|
||||||
"orderBy": {
|
|
||||||
"type": "default",
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"dimension": "edit_count",
|
|
||||||
"direction": "DESCENDING"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"limit": 10
|
|
||||||
},
|
|
||||||
"aggregations": [
|
|
||||||
{
|
|
||||||
"type": "longSum",
|
|
||||||
"fieldName": "count",
|
|
||||||
"name": "edit_count"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"filter": {
|
|
||||||
"type": "selector",
|
|
||||||
"dimension": "country",
|
|
||||||
"value": "United States"
|
|
||||||
},
|
|
||||||
"intervals": [
|
|
||||||
"2012-10-01T00:00/2020-01-01T00"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we’ve introduced to our query can help define the format of our results and provide an answer to our question.
|
and put the following in there:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"queryType": "groupBy",
|
||||||
|
"dataSource": "wikipedia",
|
||||||
|
"granularity": "all",
|
||||||
|
"dimensions": [ "page" ],
|
||||||
|
"orderBy": {
|
||||||
|
"type": "default",
|
||||||
|
"columns": [ { "dimension": "edit_count", "direction": "DESCENDING" } ],
|
||||||
|
"limit": 10
|
||||||
|
},
|
||||||
|
"aggregations": [
|
||||||
|
{"type": "longSum", "fieldName": "count", "name": "edit_count"}
|
||||||
|
],
|
||||||
|
"filter": { "type": "selector", "dimension": "country", "value": "United States" },
|
||||||
|
"intervals": ["2012-10-01T00:00/2020-01-01T00"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||||
|
|
||||||
If you issue the query:
|
If you issue the query:
|
||||||
|
|
||||||
<code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body
|
||||||
|
```
|
||||||
|
|
||||||
You should see an answer to our question. As an example, some results are shown below:
|
You should see an answer to our question. As an example, some results are shown below:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
“version” : “v1”,
|
"version" : "v1",
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
“event” : {
|
"event" : { "page" : "RTC_Transit", "edit_count" : 6 }
|
||||||
“page” : “RTC\_Transit”,
|
},
|
||||||
“edit\_count” : 6
|
{
|
||||||
}
|
"version" : "v1",
|
||||||
}, {
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
“version” : “v1”,
|
"event" : { "page" : "List_of_Deadly_Women_episodes", "edit_count" : 4 }
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
},
|
||||||
“event” : {
|
{
|
||||||
“page” : “List\_of\_Deadly\_Women\_episodes”,
|
"version" : "v1",
|
||||||
“edit\_count” : 4
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
}
|
"event" : { "page" : "User_talk:David_Biddulph", "edit_count" : 4 }
|
||||||
}, {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“page” : “User\_talk:David\_Biddulph”,
|
|
||||||
“edit\_count” : 4
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
…
|
...
|
||||||
\`\`\`
|
```
|
||||||
|
|
||||||
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Welcome back! In our first [tutorial](https://github.com/metamx/druid/wiki/Tutorial%3A-A-First-Look-at-Druid), we introduced you to the most basic Druid setup: a single realtime node. We streamed in some data and queried it. Realtime nodes collect very recent data and periodically hand that data off to the rest of the Druid cluster. Some questions about the architecture must naturally come to mind. What does the rest of Druid cluster look like? How does Druid load available static data?
|
Welcome back! In our first [tutorial](https://github.com/metamx/druid/wiki/Tutorial%3A-A-First-Look-at-Druid), we introduced you to the most basic Druid setup: a single realtime node. We streamed in some data and queried it. Realtime nodes collect very recent data and periodically hand that data off to the rest of the Druid cluster. Some questions about the architecture must naturally come to mind. What does the rest of Druid cluster look like? How does Druid load available static data?
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ If you followed the first tutorial, you should already have Druid downloaded. If
|
||||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
|
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.5.54-bin.tar.gz)
|
||||||
|
|
||||||
and untar the contents within by issuing:
|
and untar the contents within by issuing:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
tar -zxvf druid-services-*-bin.tar.gz
|
tar -zxvf druid-services-*-bin.tar.gz
|
||||||
cd druid-services-*
|
cd druid-services-*
|
||||||
|
@ -32,15 +33,18 @@ For deep storage, we have made a public S3 bucket (static.druid.io) available wh
|
||||||
1. If you don't already have it, download MySQL Community Server here: [http://dev.mysql.com/downloads/mysql/](http://dev.mysql.com/downloads/mysql/)
|
1. If you don't already have it, download MySQL Community Server here: [http://dev.mysql.com/downloads/mysql/](http://dev.mysql.com/downloads/mysql/)
|
||||||
2. Install MySQL
|
2. Install MySQL
|
||||||
3. Create a druid user and database
|
3. Create a druid user and database
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mysql -u root
|
mysql -u root
|
||||||
```
|
```
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
GRANT ALL ON druid.* TO 'druid'@'localhost' IDENTIFIED BY 'diurd';
|
GRANT ALL ON druid.* TO 'druid'@'localhost' IDENTIFIED BY 'diurd';
|
||||||
CREATE database druid;
|
CREATE database druid;
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setting up Zookeeper ###
|
### Setting up Zookeeper ###
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://www.motorlogy.com/apache/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
|
curl http://www.motorlogy.com/apache/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
|
||||||
tar xzf zookeeper-3.4.5.tar.gz
|
tar xzf zookeeper-3.4.5.tar.gz
|
||||||
|
@ -55,6 +59,7 @@ cd ..
|
||||||
Similar to the first tutorial, the data we will be loading is based on edits that have occurred on Wikipedia. Every time someone edits a page in Wikipedia, metadata is generated about the editor and edited page. Druid collects each individual event and packages them together in a container known as a [segment](https://github.com/metamx/druid/wiki/Segments). Segments contain data over some span of time. We've prebuilt a segment for this tutorial and will cover making your own segments in other [pages](https://github.com/metamx/druid/wiki/Loading-Your-Data).The segment we are going to work with has the following format:
|
Similar to the first tutorial, the data we will be loading is based on edits that have occurred on Wikipedia. Every time someone edits a page in Wikipedia, metadata is generated about the editor and edited page. Druid collects each individual event and packages them together in a container known as a [segment](https://github.com/metamx/druid/wiki/Segments). Segments contain data over some span of time. We've prebuilt a segment for this tutorial and will cover making your own segments in other [pages](https://github.com/metamx/druid/wiki/Loading-Your-Data).The segment we are going to work with has the following format:
|
||||||
|
|
||||||
Dimensions (things to filter on):
|
Dimensions (things to filter on):
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"page"
|
"page"
|
||||||
"language"
|
"language"
|
||||||
|
@ -71,6 +76,7 @@ Dimensions (things to filter on):
|
||||||
```
|
```
|
||||||
|
|
||||||
Metrics (things to aggregate over):
|
Metrics (things to aggregate over):
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"count"
|
"count"
|
||||||
"added"
|
"added"
|
||||||
|
@ -98,7 +104,7 @@ To create the master config file:
|
||||||
mkdir config/master
|
mkdir config/master
|
||||||
```
|
```
|
||||||
|
|
||||||
Under the directory we just created, create the file ```runtime.properties``` with the following contents:
|
Under the directory we just created, create the file `runtime.properties` with the following contents:
|
||||||
|
|
||||||
```
|
```
|
||||||
druid.host=127.0.0.1:8082
|
druid.host=127.0.0.1:8082
|
||||||
|
@ -146,7 +152,8 @@ To create the compute config file:
|
||||||
mkdir config/compute
|
mkdir config/compute
|
||||||
```
|
```
|
||||||
|
|
||||||
Under the directory we just created, create the file ```runtime.properties``` with the following contents:
|
Under the directory we just created, create the file `runtime.properties` with the following contents:
|
||||||
|
|
||||||
```
|
```
|
||||||
druid.host=127.0.0.1:8081
|
druid.host=127.0.0.1:8081
|
||||||
druid.port=8081
|
druid.port=8081
|
||||||
|
@ -219,67 +226,17 @@ To start the broker node:
|
||||||
```bash
|
```bash
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath lib/*:config/broker com.metamx.druid.http.BrokerMain
|
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath lib/*:config/broker com.metamx.druid.http.BrokerMain
|
||||||
```
|
```
|
||||||
<!--
|
|
||||||
### Optional: Start a Realtime Node ###
|
|
||||||
```
|
|
||||||
druid.host=127.0.0.1:8083
|
|
||||||
druid.port=8083
|
|
||||||
druid.service=realtime
|
|
||||||
|
|
||||||
# logging
|
|
||||||
com.metamx.emitter.logging=true
|
|
||||||
com.metamx.emitter.logging.level=info
|
|
||||||
|
|
||||||
# zk
|
|
||||||
druid.zk.service.host=localhost
|
|
||||||
druid.zk.paths.base=/druid
|
|
||||||
druid.zk.paths.discoveryPath=/druid/discoveryPath
|
|
||||||
|
|
||||||
# processing
|
|
||||||
druid.processing.buffer.sizeBytes=10000000
|
|
||||||
|
|
||||||
# schema
|
|
||||||
druid.realtime.specFile=realtime.spec
|
|
||||||
|
|
||||||
# aws
|
|
||||||
com.metamx.aws.accessKey=dummy_access_key
|
|
||||||
com.metamx.aws.secretKey=dummy_secret_key
|
|
||||||
|
|
||||||
# db
|
|
||||||
druid.database.segmentTable=segments
|
|
||||||
druid.database.user=druid
|
|
||||||
druid.database.password=diurd
|
|
||||||
druid.database.connectURI=jdbc:mysql://localhost:3306/druid
|
|
||||||
druid.database.ruleTable=rules
|
|
||||||
druid.database.configTable=config
|
|
||||||
|
|
||||||
# Path on local FS for storage of segments; dir will be created if needed
|
|
||||||
druid.paths.indexCache=/tmp/druid/indexCache
|
|
||||||
|
|
||||||
# handoff
|
|
||||||
druid.pusher.s3.bucket=dummy_s3_bucket
|
|
||||||
druid.pusher.s3.baseKey=dummy_key
|
|
||||||
```
|
|
||||||
|
|
||||||
To start the realtime node:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath services/target/druid-services-*-selfcontained.jar:config/realtime com.metamx.druid.realtime.RealtimeMain
|
|
||||||
```
|
|
||||||
-->
|
|
||||||
## Loading the Data ##
|
## Loading the Data ##
|
||||||
|
|
||||||
The MySQL dependency we introduced earlier on contains a 'segments' table that contains entries for segments that should be loaded into our cluster. The Druid master compares this table with segments that already exist in the cluster to determine what should be loaded and dropped. To load our wikipedia segment, we need to create an entry in our MySQL segment table.
|
The MySQL dependency we introduced earlier on contains a 'segments' table that contains entries for segments that should be loaded into our cluster. The Druid master compares this table with segments that already exist in the cluster to determine what should be loaded and dropped. To load our wikipedia segment, we need to create an entry in our MySQL segment table.
|
||||||
|
|
||||||
Usually, when new segments are created, these MySQL entries are created directly so you never have to do this by hand. For this tutorial, we can do this manually by going back into MySQL and issuing:
|
Usually, when new segments are created, these MySQL entries are created directly so you never have to do this by hand. For this tutorial, we can do this manually by going back into MySQL and issuing:
|
||||||
|
|
||||||
```
|
``` sql
|
||||||
use druid;
|
use druid;
|
||||||
```
|
|
||||||
|
|
||||||
``
|
|
||||||
INSERT INTO segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
|
INSERT INTO segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}');
|
||||||
``
|
```
|
||||||
|
|
||||||
If you look in your master node logs, you should, after a maximum of a minute or so, see logs of the following form:
|
If you look in your master node logs, you should, after a maximum of a minute or so, see logs of the following form:
|
||||||
|
|
||||||
|
@ -294,9 +251,9 @@ When the segment completes downloading and ready for queries, you should see the
|
||||||
2013-08-08 22:48:41,959 INFO [ZkCoordinator-0] com.metamx.druid.coordination.BatchDataSegmentAnnouncer - Announcing segment[wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z] at path[/druid/segments/127.0.0.1:8081/2013-08-08T22:48:41.959Z]
|
2013-08-08 22:48:41,959 INFO [ZkCoordinator-0] com.metamx.druid.coordination.BatchDataSegmentAnnouncer - Announcing segment[wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z] at path[/druid/segments/127.0.0.1:8081/2013-08-08T22:48:41.959Z]
|
||||||
```
|
```
|
||||||
|
|
||||||
At this point, we can query the segment. For more information on querying, see this[link](https://github.com/metamx/druid/wiki/Querying).
|
At this point, we can query the segment. For more information on querying, see this [link](https://github.com/metamx/druid/wiki/Querying).
|
||||||
|
|
||||||
## Next Steps ##
|
## Next Steps ##
|
||||||
|
|
||||||
Now that you have an understanding of what the Druid clsuter looks like, why not load some of your own data?
|
Now that you have an understanding of what the Druid clsuter looks like, why not load some of your own data?
|
||||||
Check out the [Loading Your Own Data](https://github.com/metamx/druid/wiki/Loading-Your-Data) section for more info!
|
Check out the [Loading Your Own Data](https://github.com/metamx/druid/wiki/Loading-Your-Data) section for more info!
|
||||||
|
|
|
@ -1,345 +1,307 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
|
Greetings! This tutorial will help clarify some core Druid concepts. We will use a realtime dataset and issue some basic Druid queries. If you are ready to explore Druid, and learn a thing or two, read on!
|
||||||
|
|
||||||
About the data
|
About the data
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
The data source we’ll be working with is the Bit.ly USA Government website statistics stream. You can see the stream [here](http://developer.usa.gov/1usagov), and read about the stream [here](http://www.usa.gov/About/developer-resources/1usagov.shtml) . This is a feed of json data that gets updated whenever anyone clicks a bit.ly shortened USA.gov website. A typical event might look something like this:
|
The data source we'll be working with is the Bit.ly USA Government website statistics stream. You can see the stream [here](http://developer.usa.gov/1usagov), and read about the stream [here](http://www.usa.gov/About/developer-resources/1usagov.shtml) . This is a feed of json data that gets updated whenever anyone clicks a bit.ly shortened USA.gov website. A typical event might look something like this:
|
||||||
\`\`\`json
|
|
||||||
|
```json
|
||||||
{
|
{
|
||||||
[user\_agent]() “Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)”,
|
"user_agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
|
||||||
[country]() “US”,
|
"country": "US",
|
||||||
[known\_user]() 1,
|
"known_user": 1,
|
||||||
[timezone]() “America/New\_York”,
|
"timezone": "America/New_York",
|
||||||
[geo\_region]() “DC”,
|
"geo_region": "DC",
|
||||||
[global\_bitly\_hash]() “17ctAFs”,
|
"global_bitly_hash": "17ctAFs",
|
||||||
[encoding\_user\_bitly\_hash]() “17ctAFr”,
|
"encoding_user_bitly_hash": "17ctAFr",
|
||||||
[encoding\_user\_login]() “senrubiopress”,
|
"encoding_user_login": "senrubiopress",
|
||||||
[aaccept\_language]() “en-US”,
|
"aaccept_language": "en-US",
|
||||||
[short\_url\_cname]() “1.usa.gov”,
|
"short_url_cname": "1.usa.gov",
|
||||||
[referring\_url]() “http://t.co/4Av4NUFAYq”,
|
"referring_url": "http://t.co/4Av4NUFAYq",
|
||||||
[long\_url]() “http://www.rubio.senate.gov/public/index.cfm/fighting-for-florida?ID=c8357d12-9da8-4e9d-b00d-7168e1bf3599”,
|
"long_url": "http://www.rubio.senate.gov/public/index.cfm/fighting-for-florida?ID=c8357d12-9da8-4e9d-b00d-7168e1bf3599",
|
||||||
[timestamp]() 1372190407,
|
"timestamp": 1372190407,
|
||||||
[timestamp of time hash was created]() 1372190097,
|
"timestamp of time hash was created": 1372190097,
|
||||||
[city]() “Washington”,
|
"city": "Washington",
|
||||||
[latitude\_longitude]() [
|
"latitude_longitude": [ 38.893299, -77.014603 ]
|
||||||
38.893299,
|
|
||||||
~~77.014603
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
\`\`\`
|
```
|
||||||
The “known\_user” field is always 1 or 0. It is 1 if the user is known to the server, and 0 otherwise. We will use this field extensively in this demo.
|
|
||||||
|
The "known_user" field is always 1 or 0. It is 1 if the user is known to the server, and 0 otherwise. We will use this field extensively in this demo.
|
||||||
|
|
||||||
h2. Setting Up
|
h2. Setting Up
|
||||||
There are two ways to setup Druid: download a tarball, or ]. You only need to do one of these.
|
|
||||||
|
There are two ways to setup Druid: download a tarball, or [Build From Source](Build-From-Source.html). You only need to do one of these.
|
||||||
|
|
||||||
h3. Download a Tarball
|
h3. Download a Tarball
|
||||||
We’ve built a tarball that contains everything you’ll need. You’ll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.50-bin.tar.gz)
|
|
||||||
|
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.5.50-bin.tar.gz)
|
||||||
Download this file to a directory of your choosing.
|
Download this file to a directory of your choosing.
|
||||||
You can extract the awesomeness within by issuing:
|
You can extract the awesomeness within by issuing:
|
||||||
\<pre\>tar~~zxvf druid-services~~**~~bin.tar.gz\</pre\>
|
|
||||||
Not too lost so far right? That’s great! If you cd into the directory:
|
```
|
||||||
\<pre\>cd druid-services-0.5.50\</pre\>
|
tar zxvf druid-services-*-bin.tar.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
Not too lost so far right? That's great! If you cd into the directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd druid-services-0.5.50
|
||||||
|
```
|
||||||
|
|
||||||
You should see a bunch of files:
|
You should see a bunch of files:
|
||||||
\* run\_example\_server.sh
|
|
||||||
\* run\_example\_client.sh
|
* run_example_server.sh
|
||||||
\* LICENSE, config, examples, lib directories
|
* run_example_client.sh
|
||||||
|
* LICENSE, config, examples, lib directories
|
||||||
|
|
||||||
h2. Running Example Scripts
|
h2. Running Example Scripts
|
||||||
Let’s start doing stuff. You can start a Druid ] node by issuing:
|
Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
|
||||||
\<pre\>./run\_example\_server.sh\</pre\>
|
|
||||||
Select “webstream”.
|
```
|
||||||
|
./run_example_server.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Select "webstream".
|
||||||
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
|
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
|
||||||
\<pre\><code>
|
|
||||||
|
```
|
||||||
2013-07-19 21:54:05,154 INFO com.metamx.druid.realtime.RealtimeNode~~ Starting Jetty
|
2013-07-19 21:54:05,154 INFO com.metamx.druid.realtime.RealtimeNode~~ Starting Jetty
|
||||||
2013-07-19 21:54:05,154 INFO org.mortbay.log - jetty-6.1.x
|
2013-07-19 21:54:05,154 INFO org.mortbay.log - jetty-6.1.x
|
||||||
2013-07-19 21:54:05,171 INFO com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at
|
2013-07-19 21:54:05,171 INFO com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Expect to run at
|
||||||
2013-07-19 21:54:05,246 INFO org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
|
2013-07-19 21:54:05,246 INFO org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8083
|
||||||
</code>\</pre\>
|
```
|
||||||
|
|
||||||
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
|
The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. If you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
|
||||||
Okay, things are about to get real. To query the real-time node you’ve spun up, you can issue:
|
Okay, things are about to get real. To query the real-time node you've spun up, you can issue:
|
||||||
\<pre\>./run\_example\_client.sh\</pre\>
|
|
||||||
Select “webstream” once again. This script issues ]s to the data we’ve been ingesting. The query looks like this:
|
```
|
||||||
\`\`\`json
|
./run_example_client.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Select "webstream" once again. This script issues [GroupByQuery](GroupByQuery.html)s to the data we've been ingesting. The query looks like this:
|
||||||
|
|
||||||
|
```json
|
||||||
{
|
{
|
||||||
[queryType]() “groupBy”,
|
"queryType": "groupBy",
|
||||||
[dataSource]() “webstream”,
|
"dataSource": "webstream",
|
||||||
[granularity]() “minute”,
|
"granularity": "minute",
|
||||||
[dimensions]() [
|
"dimensions": [ "timezone" ],
|
||||||
“timezone”
|
"aggregations": [
|
||||||
],
|
{ "type": "count", "name": "rows" },
|
||||||
[aggregations]() [
|
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
|
||||||
{
|
],
|
||||||
[type]() “count”,
|
"filter": { "type": "selector", "dimension": "country", "value": "US" },
|
||||||
[name]() “rows”
|
"intervals": [ "2013-06-01T00:00/2020-01-01T00" ]
|
||||||
},
|
|
||||||
{
|
|
||||||
[type]() “doubleSum”,
|
|
||||||
[fieldName]() “known\_users”,
|
|
||||||
[name]() “known\_users”
|
|
||||||
}
|
|
||||||
],
|
|
||||||
[filter]() {
|
|
||||||
[type]() “selector”,
|
|
||||||
[dimension]() “country”,
|
|
||||||
[value]() “US”
|
|
||||||
},
|
|
||||||
[intervals]() [
|
|
||||||
“2013-06-01T00:00/2020-01-01T00”
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
\`\`\`
|
```
|
||||||
This is a****groupBy**\* query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“country”** dimension, to only look at website hits in the US. Our **aggregations** are what we are calculating: a row count, and the sum of the number of known users in our data.
|
This is a `groupBy` query, which you may be familiar with from SQL. We are grouping, or aggregating, via the `dimensions` field: . We are **filtering** via the `"country"` dimension, to only look at website hits in the US. Our **aggregations** are what we are calculating: a row count, and the sum of the number of known users in our data.
|
||||||
|
|
||||||
The result looks something like this:
|
The result looks something like this:
|
||||||
\`\`\`json
|
|
||||||
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[version]() “v1”,
|
"version": "v1",
|
||||||
[timestamp]() “2013-07-18T19:39:00.000Z”,
|
"timestamp": "2013-07-18T19:39:00.000Z",
|
||||||
[event]() {
|
"event": { "timezone": "America/Chicago", "known_users": 10, "rows": 15 }
|
||||||
[timezone]() “America/Chicago”,
|
},
|
||||||
[known\_users]() 10,
|
{
|
||||||
[rows]() 15
|
"version": "v1",
|
||||||
}
|
"timestamp": "2013-07-18T19:39:00.000Z",
|
||||||
},
|
"event": { "timezone": "America/Los_Angeles", "known_users": 0, "rows": 3 }
|
||||||
{
|
},
|
||||||
[version]() “v1”,
|
...
|
||||||
[timestamp]() “2013-07-18T19:39:00.000Z”,
|
```
|
||||||
[event]() {
|
|
||||||
[timezone]() “America/Los\_Angeles”,
|
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
|
||||||
[known\_users]() 0,
|
|
||||||
[rows]() 3
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
This groupBy query is a bit complicated and we’ll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
|
|
||||||
h2. Querying Druid
|
h2. Querying Druid
|
||||||
|
|
||||||
In your favorite editor, create the file:
|
In your favorite editor, create the file:
|
||||||
\<pre\>time\_boundary\_query.body\</pre\>
|
|
||||||
|
```
|
||||||
|
time_boundary_query.body
|
||||||
|
```
|
||||||
|
|
||||||
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
||||||
\<pre\><code>
|
|
||||||
|
```
|
||||||
{
|
{
|
||||||
[queryType]() “timeBoundary”,
|
"queryType": "timeBoundary",
|
||||||
[dataSource]() “webstream”
|
"dataSource": "webstream"
|
||||||
}
|
}
|
||||||
</code>\</pre\>
|
```
|
||||||
The ] is one of the simplest Druid queries. To run the query, you can issue:
|
|
||||||
\<pre\><code> curl~~X POST ‘http://localhost:8083/druid/v2/?pretty’ ~~H ‘content-type: application/json’~~d ```` time_boundary_query.body</code></pre>
|
The [TimeBoundaryQuery](TimeBoundaryQuery.html) is one of the simplest Druid queries. To run the query, you can issue:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d time_boundary_query.body
|
||||||
|
```
|
||||||
|
|
||||||
We get something like this JSON back:
|
We get something like this JSON back:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"timestamp": "2013-07-18T19:39:00.000Z",
|
"timestamp": "2013-07-18T19:39:00.000Z",
|
||||||
"result": {
|
"result": {
|
||||||
"minTime": "2013-07-18T19:39:00.000Z",
|
"minTime": "2013-07-18T19:39:00.000Z",
|
||||||
"maxTime": "2013-07-18T19:46:00.000Z"
|
"maxTime": "2013-07-18T19:46:00.000Z"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
|
As you can probably tell, the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity). Let's explore a bit further.
|
||||||
|
|
||||||
Return to your favorite editor and create the file:
|
Return to your favorite editor and create the file:
|
||||||
<pre>timeseries_query.body</pre>
|
|
||||||
|
```
|
||||||
|
timeseries_query.body
|
||||||
|
```
|
||||||
|
|
||||||
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
|
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
|
||||||
<pre><code>
|
|
||||||
|
```json
|
||||||
{
|
{
|
||||||
"queryType": "timeseries",
|
"queryType": "timeseries",
|
||||||
"dataSource": "webstream",
|
"dataSource": "webstream",
|
||||||
"intervals": [
|
"intervals": [ "2010-01-01/2020-01-01" ],
|
||||||
"2010-01-01/2020-01-01"
|
"granularity": "all",
|
||||||
],
|
"aggregations": [
|
||||||
"granularity": "all",
|
{ "type": "count", "name": "rows" },
|
||||||
"aggregations": [
|
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
|
||||||
{
|
]
|
||||||
"type": "count",
|
|
||||||
"name": "rows"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "doubleSum",
|
|
||||||
"fieldName": "known_users",
|
|
||||||
"name": "known_users"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
</code></pre>
|
```
|
||||||
|
|
||||||
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
|
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
|
||||||
To issue the query and get some results, run the following in your command line:
|
To issue the query and get some results, run the following in your command line:
|
||||||
<pre><code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
|
|
||||||
|
|
||||||
</pre>
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d timeseries_query.body
|
||||||
|
```
|
||||||
|
|
||||||
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
“timestamp” : “2013-07-18T19:39:00.000Z”,
|
"timestamp" : "2013-07-18T19:39:00.000Z",
|
||||||
“result” : {
|
"result" : { "known_users" : 787.0, "rows" : 2004 }
|
||||||
“known\_users” : 787.0,
|
}
|
||||||
“rows” : 2004
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
\`\`\`
|
```
|
||||||
|
|
||||||
If you issue the query again, you should notice your results updating.
|
If you issue the query again, you should notice your results updating.
|
||||||
|
|
||||||
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
||||||
|
|
||||||
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct! We can specify different granularities to bucket our results, like so:
|
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct! We can specify different granularities to bucket our results, like so:
|
||||||
|
|
||||||
<code>
|
```json
|
||||||
{
|
{
|
||||||
"queryType": "timeseries",
|
"queryType": "timeseries",
|
||||||
"dataSource": "webstream",
|
"dataSource": "webstream",
|
||||||
"intervals": [
|
"intervals": [ "2010-01-01/2020-01-01" ],
|
||||||
"2010-01-01/2020-01-01"
|
"granularity": "minute",
|
||||||
],
|
"aggregations": [
|
||||||
"granularity": "minute",
|
{ "type": "count", "name": "rows" },
|
||||||
"aggregations": [
|
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
|
||||||
{
|
]
|
||||||
"type": "count",
|
}
|
||||||
"name": "rows"
|
```
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "doubleSum",
|
|
||||||
"fieldName": "known_users",
|
|
||||||
"name": "known_users"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
This gives us something like the following:
|
This gives us something like the following:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[timestamp]() “2013-07-18T19:39:00.000Z”,
|
"timestamp": "2013-07-18T19:39:00.000Z",
|
||||||
[result]() {
|
"result": { "known_users": 33, "rows": 76 }
|
||||||
[known\_users]() 33,
|
},
|
||||||
[rows]() 76
|
{
|
||||||
}
|
"timestamp": "2013-07-18T19:40:00.000Z",
|
||||||
},
|
"result": { "known_users": 105, "rows": 221 }
|
||||||
{
|
},
|
||||||
[timestamp]() “2013-07-18T19:40:00.000Z”,
|
{
|
||||||
[result]() {
|
"timestamp": "2013-07-18T19:41:00.000Z",
|
||||||
[known\_users]() 105,
|
"result": { "known_users": 53, "rows": 167 }
|
||||||
[rows]() 221
|
},
|
||||||
}
|
...
|
||||||
},
|
```
|
||||||
{
|
|
||||||
[timestamp]() “2013-07-18T19:41:00.000Z”,
|
|
||||||
[result]() {
|
|
||||||
[known\_users]() 53,
|
|
||||||
[rows]() 167
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Solving a Problem
|
Solving a Problem
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
One of Druid’s main powers is to provide answers to problems, so let’s pose a problem. What if we wanted to know what the top states in the US are, ordered by the number of visits by known users over the last few minutes? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
|
One of Druid's main powers is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top states in the US are, ordered by the number of visits by known users over the last few minutes? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
|
||||||
|
|
||||||
Let’s create the file:
|
Let's create the file:
|
||||||
|
|
||||||
group_by_query.body</pre>
|
```
|
||||||
and put the following in there:
|
group_by_query.body
|
||||||
<pre><code>
|
```
|
||||||
{
|
|
||||||
"queryType": "groupBy",
|
and put the following in there:
|
||||||
"dataSource": "webstream",
|
|
||||||
"granularity": "all",
|
```
|
||||||
"dimensions": [
|
{
|
||||||
"geo_region"
|
"queryType": "groupBy",
|
||||||
|
"dataSource": "webstream",
|
||||||
|
"granularity": "all",
|
||||||
|
"dimensions": [ "geo_region" ],
|
||||||
|
"orderBy": {
|
||||||
|
"type": "default",
|
||||||
|
"columns": [
|
||||||
|
{ "dimension": "known_users", "direction": "DESCENDING" }
|
||||||
],
|
],
|
||||||
"orderBy": {
|
"limit": 10
|
||||||
"type": "default",
|
},
|
||||||
"columns": [
|
"aggregations": [
|
||||||
{
|
{ "type": "count", "name": "rows" },
|
||||||
"dimension": "known_users",
|
{ "type": "doubleSum", "fieldName": "known_users", "name": "known_users" }
|
||||||
"direction": "DESCENDING"
|
],
|
||||||
}
|
"filter": { "type": "selector", "dimension": "country", "value": "US" },
|
||||||
],
|
"intervals": [ "2012-10-01T00:00/2020-01-01T00" ]
|
||||||
"limit": 10
|
}
|
||||||
},
|
```
|
||||||
"aggregations": [
|
|
||||||
{
|
|
||||||
"type": "count",
|
|
||||||
"name": "rows"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "doubleSum",
|
|
||||||
"fieldName": "known_users",
|
|
||||||
"name": "known_users"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"filter": {
|
|
||||||
"type": "selector",
|
|
||||||
"dimension": "country",
|
|
||||||
"value": "US"
|
|
||||||
},
|
|
||||||
"intervals": [
|
|
||||||
"2012-10-01T00:00/2020-01-01T00"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we’ve introduced to our query can help define the format of our results and provide an answer to our question.
|
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||||
|
|
||||||
If you issue the query:
|
If you issue the query:
|
||||||
|
|
||||||
<code>curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
|
```
|
||||||
|
curl -X POST 'http://localhost:8083/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body
|
||||||
|
```
|
||||||
|
|
||||||
You should see an answer to our question. For my stream, it looks like this:
|
You should see an answer to our question. For my stream, it looks like this:
|
||||||
|
|
||||||
\`\`\`json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[version]() “v1”,
|
"version": "v1",
|
||||||
[timestamp]() “2012-10-01T00:00:00.000Z”,
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
[event]() {
|
"event": { "geo_region": "RI", "known_users": 359, "rows": 143 }
|
||||||
[geo\_region]() “RI”,
|
},
|
||||||
[known\_users]() 359,
|
{
|
||||||
[rows]() 143
|
"version": "v1",
|
||||||
}
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
},
|
"event": { "geo_region": "NY", "known_users": 187, "rows": 322 }
|
||||||
{
|
},
|
||||||
[version]() “v1”,
|
{
|
||||||
[timestamp]() “2012-10-01T00:00:00.000Z”,
|
"version": "v1",
|
||||||
[event]() {
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
[geo\_region]() “NY”,
|
"event": { "geo_region": "CA", "known_users": 145, "rows": 466 }
|
||||||
[known\_users]() 187,
|
},
|
||||||
[rows]() 322
|
{
|
||||||
}
|
"version": "v1",
|
||||||
},
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
{
|
"event": { "geo_region": "IL", "known_users": 121, "rows": 185 }
|
||||||
[version]() “v1”,
|
},
|
||||||
[timestamp]() “2012-10-01T00:00:00.000Z”,
|
...
|
||||||
[event]() {
|
```
|
||||||
[geo\_region]() “CA”,
|
|
||||||
[known\_users]() 145,
|
|
||||||
[rows]() 466
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[version]() “v1”,
|
|
||||||
[timestamp]() “2012-10-01T00:00:00.000Z”,
|
|
||||||
[event]() {
|
|
||||||
[geo\_region]() “IL”,
|
|
||||||
[known\_users]() 121,
|
|
||||||
[rows]() 185
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
||||||
|
|
||||||
|
|
|
@ -1,329 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
Greetings! We see you’ve taken an interest in Druid. That’s awesome! Hopefully this tutorial will help clarify some core Druid concepts. We will go through one of the Real-time [Examples](Examples.html), and issue some basic Druid queries. The data source we’ll be working with is the [Twitter spritzer stream](https://dev.twitter.com/docs/streaming-apis/streams/public). If you are ready to explore Druid, brave its challenges, and maybe learn a thing or two, read on!
|
|
||||||
|
|
||||||
Setting Up
|
|
||||||
----------
|
|
||||||
|
|
||||||
There are two ways to setup Druid: download a tarball, or build it from source.
|
|
||||||
|
|
||||||
### Download a Tarball
|
|
||||||
|
|
||||||
We’ve built a tarball that contains everything you’ll need. You’ll find it [here](http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz).
|
|
||||||
Download this bad boy to a directory of your choosing.
|
|
||||||
|
|
||||||
You can extract the awesomeness within by issuing:
|
|
||||||
|
|
||||||
tar -zxvf druid-services-0.4.6.tar.gz
|
|
||||||
|
|
||||||
Not too lost so far right? That’s great! If you cd into the directory:
|
|
||||||
|
|
||||||
cd druid-services-0.4.6-SNAPSHOT
|
|
||||||
|
|
||||||
You should see a bunch of files:
|
|
||||||
\* run\_example\_server.sh
|
|
||||||
\* run\_example\_client.sh
|
|
||||||
\* LICENSE, config, examples, lib directories
|
|
||||||
|
|
||||||
### Clone and Build from Source
|
|
||||||
|
|
||||||
The other way to setup Druid is from source via git. To do so, run these commands:
|
|
||||||
|
|
||||||
\`\`\`
|
|
||||||
git clone git@github.com:metamx/druid.git
|
|
||||||
cd druid
|
|
||||||
git checkout druid-0.4.32-branch
|
|
||||||
./build.sh
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
You should see a bunch of files:
|
|
||||||
|
|
||||||
\`\`\`
|
|
||||||
DruidCorporateCLA.pdf README common examples indexer pom.xml server
|
|
||||||
DruidIndividualCLA.pdf build.sh doc group\_by.body install publications services
|
|
||||||
LICENSE client eclipse\_formatting.xml index-common merger realtime
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
You can find the example executables in the examples/bin directory:
|
|
||||||
\* run\_example\_server.sh
|
|
||||||
\* run\_example\_client.sh
|
|
||||||
|
|
||||||
Running Example Scripts
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
Let’s start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
|
|
||||||
|
|
||||||
./run_example_server.sh
|
|
||||||
|
|
||||||
Select “twitter”.
|
|
||||||
|
|
||||||
You’ll need to register a new application with the twitter API, which only takes a minute. Go to [https://twitter.com/oauth\_clients/new](https://twitter.com/oauth_clients/new) and fill out the form and submit. Don’t worry, the home page and callback url can be anything. This will generate keys for the Twitter example application. Take note of the values for consumer key/secret and access token/secret.
|
|
||||||
|
|
||||||
Enter your credentials when prompted.
|
|
||||||
|
|
||||||
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below. If you see crazy exceptions, you probably typed in your login information incorrectly.
|
|
||||||
|
|
||||||
<code>
|
|
||||||
2013-05-17 23:04:40,934 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8080
|
|
||||||
2013-05-17 23:04:40,935 INFO [main] com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler - Invoking start method[public void com.metamx.druid.http.FileRequestLogger.start()] on object[com.metamx.druid.http.FileRequestLogger@42bb0406].
|
|
||||||
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Connection established.
|
|
||||||
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] druid.examples.twitter.TwitterSpritzerFirehoseFactory - Connected_to_Twitter
|
|
||||||
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Receiving status stream.
|
|
||||||
</code>
|
|
||||||
|
|
||||||
Periodically, you’ll also see messages of the form:
|
|
||||||
|
|
||||||
<code>
|
|
||||||
2013-05-17 23:04:59,793 INFO [chief-twitterstream] druid.examples.twitter.TwitterSpritzerFirehoseFactory - nextRow() has returned 1,000 InputRows
|
|
||||||
</code>
|
|
||||||
|
|
||||||
These messages indicate you are ingesting events. The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. Persisting to disk generates a whole bunch of logs:
|
|
||||||
|
|
||||||
<code>
|
|
||||||
2013-05-17 23:06:40,918 INFO [chief-twitterstream] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Submitting persist runnable for dataSource[twitterstream]
|
|
||||||
2013-05-17 23:06:40,920 INFO [twitterstream-incremental-persist] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - DataSource[twitterstream], Interval[2013-05-17T23:00:00.000Z/2013-05-18T00:00:00.000Z], persisting Hydrant[FireHydrant{index=com.metamx.druid.index.v1.IncrementalIndex@126212dd, queryable=com.metamx.druid.index.IncrementalIndexSegment@64c47498, count=0}]
|
|
||||||
2013-05-17 23:06:40,937 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting persist for interval[2013-05-17T23:00:00.000Z/2013-05-17T23:07:00.000Z], rows[4,666]
|
|
||||||
2013-05-17 23:06:41,039 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed index.drd in 11 millis.
|
|
||||||
2013-05-17 23:06:41,070 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed dim conversions in 31 millis.
|
|
||||||
2013-05-17 23:06:41,275 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.CompressedPools - Allocating new chunkEncoder[1]
|
|
||||||
2013-05-17 23:06:41,332 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed walk through of 4,666 rows in 262 millis.
|
|
||||||
2013-05-17 23:06:41,334 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[htags] with cardinality[634]
|
|
||||||
2013-05-17 23:06:41,381 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[htags] in 49 millis.
|
|
||||||
2013-05-17 23:06:41,382 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[lang] with cardinality[19]
|
|
||||||
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[lang] in 17 millis.
|
|
||||||
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[utc_offset] with cardinality[32]
|
|
||||||
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[utc_offset] in 15 millis.
|
|
||||||
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed inverted.drd in 81 millis.
|
|
||||||
2013-05-17 23:06:41,425 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexIO$DefaultIndexIOHandler - Converting v8[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] to v9[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0]
|
|
||||||
2013-05-17 23:06:41,426 INFO [twitterstream-incremental-persist]
|
|
||||||
... ETC
|
|
||||||
</code>
|
|
||||||
|
|
||||||
The logs are about building different columns, probably not the most exciting stuff (they might as well be in Vulcan) if are you learning about Druid for the first time. Nevertheless, if you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our [White Paper](http://static.druid.io/docs/druid.pdf).
|
|
||||||
|
|
||||||
Okay, things are about to get real (~~time). To query the real-time node you’ve spun up, you can issue:
|
|
||||||
\<pre\>./run\_example\_client.sh\</pre\>
|
|
||||||
Select “twitter” once again. This script issues ]s to the twitter data we’ve been ingesting. The query looks like this:
|
|
||||||
\`\`\`json
|
|
||||||
{
|
|
||||||
[queryType]() “groupBy”,
|
|
||||||
[dataSource]() “twitterstream”,
|
|
||||||
[granularity]() “all”,
|
|
||||||
[dimensions]() ,
|
|
||||||
[aggregations]([)
|
|
||||||
{ [type]() “count”, [name]() “rows”},
|
|
||||||
{ [type]() “doubleSum”, [fieldName]() “tweets”, [name]() “tweets”}
|
|
||||||
],
|
|
||||||
[filter]() { [type]() “selector”, [dimension]() “lang”, [value]() “en” },
|
|
||||||
[intervals](["2012-10-01T00:00/2020-01-01T00"])
|
|
||||||
}
|
|
||||||
\`\`\`
|
|
||||||
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: . We are **filtering** via the **“lang”** dimension, to only look at english tweets. Our **aggregations** are what we are calculating: a row count, and the sum of the tweets in our data.
|
|
||||||
The result looks something like this:
|
|
||||||
\`\`\`json
|
|
||||||
[
|
|
||||||
{
|
|
||||||
[version]() “v1”,
|
|
||||||
[timestamp]() “2012-10-01T00:00:00.000Z”,
|
|
||||||
[event]() {
|
|
||||||
[utc\_offset]() “~~10800",
|
|
||||||
[tweets]() 90,
|
|
||||||
[lang]() "en",
|
|
||||||
[rows]() 81
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
[version]() "v1",
|
|
||||||
[timestamp]() "2012-10-01T00:00:00.000Z",
|
|
||||||
[event]() {
|
|
||||||
[utc\_offset]() "~~14400”,
|
|
||||||
[tweets]() 177,
|
|
||||||
[lang]() “en”,
|
|
||||||
[rows]() 154
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
This data, plotted in a time series/distribution, looks something like this:
|
|
||||||
![Timezone / Tweets Scatter Plot](http://metamarkets.com/wp-content/uploads/2013/06/tweets_timezone_offset.png "Timezone / Tweets Scatter Plot")
|
|
||||||
This groupBy query is a bit complicated and we’ll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have [curl](http://curl.haxx.se/) installed. Control+C to break out of the client script.
|
|
||||||
h2. Querying Druid
|
|
||||||
In your favorite editor, create the file:
|
|
||||||
\<pre\>time\_boundary\_query.body\</pre\>
|
|
||||||
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
|
||||||
\<pre\><code>
|
|
||||||
</code>\</pre\>
|
|
||||||
The ] is one of the simplest Druid queries. To run the query, you can issue:
|
|
||||||
\<pre\><code> curl~~X POST ‘http://localhost:8080/druid/v2/?pretty’ ~~H ‘content-type: application/json’~~d ```` time_boundary_query.body</code></pre>
|
|
||||||
|
|
||||||
We get something like this JSON back:
|
|
||||||
|
|
||||||
```json
|
|
||||||
[ {
|
|
||||||
"timestamp" : "2013-06-10T19:09:00.000Z",
|
|
||||||
"result" : {
|
|
||||||
"minTime" : "2013-06-10T19:09:00.000Z",
|
|
||||||
"maxTime" : "2013-06-10T20:50:00.000Z"
|
|
||||||
}
|
|
||||||
} ]
|
|
||||||
```
|
|
||||||
That's the result. What information do you think the result is conveying?
|
|
||||||
...
|
|
||||||
If you said the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity), you are absolutely correct. I can see you are a person legitimately interested in learning about Druid. Let's explore a bit further.
|
|
||||||
|
|
||||||
Return to your favorite editor and create the file:
|
|
||||||
<pre>timeseries_query.body</pre>
|
|
||||||
|
|
||||||
We are going to make a slightly more complicated query, the [TimeseriesQuery](TimeseriesQuery.html). Copy and paste the following into the file:
|
|
||||||
<pre><code>{
|
|
||||||
"queryType":"timeseries",
|
|
||||||
"dataSource":"twitterstream",
|
|
||||||
"intervals":["2010-01-01/2020-01-01"],
|
|
||||||
"granularity":"all",
|
|
||||||
"aggregations":[
|
|
||||||
{ "type": "count", "name": "rows"},
|
|
||||||
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
</code></pre>
|
|
||||||
|
|
||||||
You are probably wondering, what are these [Granularities](Granularities.html) and [Aggregations](Aggregations.html) things? What the query is doing is aggregating some metrics over some span of time.
|
|
||||||
To issue the query and get some results, run the following in your command line:
|
|
||||||
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d ````timeseries\_query.body</code>
|
|
||||||
|
|
||||||
</pre>
|
|
||||||
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
|
||||||
|
|
||||||
\`\`\`json
|
|
||||||
[ {
|
|
||||||
“timestamp” : “2013-06-10T19:09:00.000Z”,
|
|
||||||
“result” : {
|
|
||||||
“tweets” : 358562.0,
|
|
||||||
“rows” : 272271
|
|
||||||
}
|
|
||||||
} ]
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
If you issue the query again, you should notice your results updating.
|
|
||||||
|
|
||||||
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
|
||||||
|
|
||||||
If you loudly exclaimed “we can change granularity to minute”, you are absolutely correct again! We can specify different granularities to bucket our results, like so:
|
|
||||||
|
|
||||||
\`\`\`json
|
|
||||||
{
|
|
||||||
[queryType]("timeseries"),
|
|
||||||
[dataSource]("twitterstream"),
|
|
||||||
[intervals](["2010-01-01/2020-01-01"]),
|
|
||||||
[granularity]("minute"),
|
|
||||||
[aggregations]([)
|
|
||||||
{ [type]() “count”, [name]() “rows”},
|
|
||||||
{ [type]() “doubleSum”, [fieldName]() “tweets”, [name]() “tweets”}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
This gives us something like the following:
|
|
||||||
|
|
||||||
\`\`\`json
|
|
||||||
[ {
|
|
||||||
“timestamp” : “2013-06-10T19:09:00.000Z”,
|
|
||||||
“result” : {
|
|
||||||
“tweets” : 2650.0,
|
|
||||||
“rows” : 2120
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“timestamp” : “2013-06-10T19:10:00.000Z”,
|
|
||||||
“result” : {
|
|
||||||
“tweets” : 3401.0,
|
|
||||||
“rows” : 2609
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“timestamp” : “2013-06-10T19:11:00.000Z”,
|
|
||||||
“result” : {
|
|
||||||
“tweets” : 3472.0,
|
|
||||||
“rows” : 2610
|
|
||||||
}
|
|
||||||
},
|
|
||||||
…
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Solving a Problem
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
One of Druid’s main powers (see what we did there?) is to provide answers to problems, so let’s pose a problem. What if we wanted to know what the top hash tags are, ordered by the number tweets, where the language is english, over the last few minutes you’ve been reading this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the [GroupByQuery](GroupByQuery.html). It would be nice if we could group by results by dimension value and somehow sort those results… and it turns out we can!
|
|
||||||
|
|
||||||
Let’s create the file:
|
|
||||||
|
|
||||||
group_by_query.body</pre>
|
|
||||||
and put the following in there:
|
|
||||||
<pre><code>{
|
|
||||||
"queryType": "groupBy",
|
|
||||||
"dataSource": "twitterstream",
|
|
||||||
"granularity": "all",
|
|
||||||
"dimensions": ["htags"],
|
|
||||||
"orderBy": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
|
|
||||||
"aggregations":[
|
|
||||||
{ "type": "longSum", "fieldName": "tweets", "name": "tweets"}
|
|
||||||
],
|
|
||||||
"filter": {"type": "selector", "dimension": "lang", "value": "en" },
|
|
||||||
"intervals":["2012-10-01T00:00/2020-01-01T00"]
|
|
||||||
}
|
|
||||||
</code>
|
|
||||||
|
|
||||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we’ve introduced to our query can help define the format of our results and provide an answer to our question.
|
|
||||||
|
|
||||||
If you issue the query:
|
|
||||||
|
|
||||||
<code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code>
|
|
||||||
|
|
||||||
You should hopefully see an answer to our question. For my twitter stream, it looks like this:
|
|
||||||
|
|
||||||
\`\`\`json
|
|
||||||
[ {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“tweets” : 2660,
|
|
||||||
“htags” : “android”
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“tweets” : 1944,
|
|
||||||
“htags” : “E3”
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“tweets” : 1927,
|
|
||||||
“htags” : “15SueñosPendientes”
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“tweets” : 1717,
|
|
||||||
“htags” : “ipad”
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
“version” : “v1”,
|
|
||||||
“timestamp” : “2012-10-01T00:00:00.000Z”,
|
|
||||||
“event” : {
|
|
||||||
“tweets” : 1515,
|
|
||||||
“htags” : “IDidntTextYouBackBecause”
|
|
||||||
}
|
|
||||||
} ]
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
|
||||||
|
|
||||||
Additional Information
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
This tutorial is merely showcasing a small fraction of what Druid can do. Next, continue on to [Loading Your Data](Loading Your Data.html).
|
|
||||||
|
|
||||||
And thus concludes our journey! Hopefully you learned a thing or two about Druid real-time ingestion, querying Druid, and how Druid can be used to solve problems. If you have additional questions, feel free to post in our [google groups page](http://www.groups.google.com/forum/#!forum/druid-development).
|
|
|
@ -0,0 +1,327 @@
|
||||||
|
---
|
||||||
|
layout: doc_page
|
||||||
|
---
|
||||||
|
Greetings! We see you've taken an interest in Druid. That's awesome! Hopefully this tutorial will help clarify some core Druid concepts. We will go through one of the Real-time "Examples":Examples.html, and issue some basic Druid queries. The data source we'll be working with is the "Twitter spritzer stream":https://dev.twitter.com/docs/streaming-apis/streams/public. If you are ready to explore Druid, brave its challenges, and maybe learn a thing or two, read on!
|
||||||
|
|
||||||
|
h2. Setting Up
|
||||||
|
|
||||||
|
There are two ways to setup Druid: download a tarball, or build it from source.
|
||||||
|
|
||||||
|
h3. Download a Tarball
|
||||||
|
|
||||||
|
We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/data/examples/druid-services-0.4.6.tar.gz.
|
||||||
|
Download this bad boy to a directory of your choosing.
|
||||||
|
|
||||||
|
You can extract the awesomeness within by issuing:
|
||||||
|
|
||||||
|
pre. tar -zxvf druid-services-0.4.6.tar.gz
|
||||||
|
|
||||||
|
Not too lost so far right? That's great! If you cd into the directory:
|
||||||
|
|
||||||
|
pre. cd druid-services-0.4.6-SNAPSHOT
|
||||||
|
|
||||||
|
You should see a bunch of files:
|
||||||
|
* run_example_server.sh
|
||||||
|
* run_example_client.sh
|
||||||
|
* LICENSE, config, examples, lib directories
|
||||||
|
|
||||||
|
h3. Clone and Build from Source
|
||||||
|
|
||||||
|
The other way to setup Druid is from source via git. To do so, run these commands:
|
||||||
|
|
||||||
|
<pre><code>git clone git@github.com:metamx/druid.git
|
||||||
|
cd druid
|
||||||
|
git checkout druid-0.4.32-branch
|
||||||
|
./build.sh
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
You should see a bunch of files:
|
||||||
|
|
||||||
|
<pre><code>DruidCorporateCLA.pdf README common examples indexer pom.xml server
|
||||||
|
DruidIndividualCLA.pdf build.sh doc group_by.body install publications services
|
||||||
|
LICENSE client eclipse_formatting.xml index-common merger realtime
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
You can find the example executables in the examples/bin directory:
|
||||||
|
* run_example_server.sh
|
||||||
|
* run_example_client.sh
|
||||||
|
|
||||||
|
h2. Running Example Scripts
|
||||||
|
|
||||||
|
Let's start doing stuff. You can start a Druid "Realtime":Realtime.html node by issuing:
|
||||||
|
<code>./run_example_server.sh</code>
|
||||||
|
|
||||||
|
Select "twitter".
|
||||||
|
|
||||||
|
You'll need to register a new application with the twitter API, which only takes a minute. Go to "https://twitter.com/oauth_clients/new":https://twitter.com/oauth_clients/new and fill out the form and submit. Don't worry, the home page and callback url can be anything. This will generate keys for the Twitter example application. Take note of the values for consumer key/secret and access token/secret.
|
||||||
|
|
||||||
|
Enter your credentials when prompted.
|
||||||
|
|
||||||
|
Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below. If you see crazy exceptions, you probably typed in your login information incorrectly.
|
||||||
|
<pre><code>2013-05-17 23:04:40,934 INFO [main] org.mortbay.log - Started SelectChannelConnector@0.0.0.0:8080
|
||||||
|
2013-05-17 23:04:40,935 INFO [main] com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler - Invoking start method[public void com.metamx.druid.http.FileRequestLogger.start()] on object[com.metamx.druid.http.FileRequestLogger@42bb0406].
|
||||||
|
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Connection established.
|
||||||
|
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] druid.examples.twitter.TwitterSpritzerFirehoseFactory - Connected_to_Twitter
|
||||||
|
2013-05-17 23:04:41,578 INFO [Twitter Stream consumer-1[Establishing connection]] twitter4j.TwitterStreamImpl - Receiving status stream.
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
Periodically, you'll also see messages of the form:
|
||||||
|
<pre><code>2013-05-17 23:04:59,793 INFO [chief-twitterstream] druid.examples.twitter.TwitterSpritzerFirehoseFactory - nextRow() has returned 1,000 InputRows
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
These messages indicate you are ingesting events. The Druid real time-node ingests events in an in-memory buffer. Periodically, these events will be persisted to disk. Persisting to disk generates a whole bunch of logs:
|
||||||
|
|
||||||
|
<pre><code>2013-05-17 23:06:40,918 INFO [chief-twitterstream] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - Submitting persist runnable for dataSource[twitterstream]
|
||||||
|
2013-05-17 23:06:40,920 INFO [twitterstream-incremental-persist] com.metamx.druid.realtime.plumber.RealtimePlumberSchool - DataSource[twitterstream], Interval[2013-05-17T23:00:00.000Z/2013-05-18T00:00:00.000Z], persisting Hydrant[FireHydrant{index=com.metamx.druid.index.v1.IncrementalIndex@126212dd, queryable=com.metamx.druid.index.IncrementalIndexSegment@64c47498, count=0}]
|
||||||
|
2013-05-17 23:06:40,937 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting persist for interval[2013-05-17T23:00:00.000Z/2013-05-17T23:07:00.000Z], rows[4,666]
|
||||||
|
2013-05-17 23:06:41,039 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed index.drd in 11 millis.
|
||||||
|
2013-05-17 23:06:41,070 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed dim conversions in 31 millis.
|
||||||
|
2013-05-17 23:06:41,275 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.CompressedPools - Allocating new chunkEncoder[1]
|
||||||
|
2013-05-17 23:06:41,332 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed walk through of 4,666 rows in 262 millis.
|
||||||
|
2013-05-17 23:06:41,334 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[htags] with cardinality[634]
|
||||||
|
2013-05-17 23:06:41,381 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[htags] in 49 millis.
|
||||||
|
2013-05-17 23:06:41,382 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[lang] with cardinality[19]
|
||||||
|
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[lang] in 17 millis.
|
||||||
|
2013-05-17 23:06:41,398 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Starting dimension[utc_offset] with cardinality[32]
|
||||||
|
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - Completed dimension[utc_offset] in 15 millis.
|
||||||
|
2013-05-17 23:06:41,413 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexMerger - outDir[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] completed inverted.drd in 81 millis.
|
||||||
|
2013-05-17 23:06:41,425 INFO [twitterstream-incremental-persist] com.metamx.druid.index.v1.IndexIO$DefaultIndexIOHandler - Converting v8[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0/v8-tmp] to v9[/tmp/example/twitter_realtime/basePersist/twitterstream/2013-05-17T23:00:00.000Z_2013-05-18T00:00:00.000Z/0]
|
||||||
|
2013-05-17 23:06:41,426 INFO [twitterstream-incremental-persist]
|
||||||
|
... ETC
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
The logs are about building different columns, probably not the most exciting stuff (they might as well be in Vulcan) if are you learning about Druid for the first time. Nevertheless, if you are interested in the details of our real-time architecture and why we persist indexes to disk, I suggest you read our "White Paper":http://static.druid.io/docs/druid.pdf.
|
||||||
|
|
||||||
|
Okay, things are about to get real (-time). To query the real-time node you've spun up, you can issue:
|
||||||
|
<pre>./run_example_client.sh</pre>
|
||||||
|
|
||||||
|
Select "twitter" once again. This script issues ["GroupByQuery":GroupByQuery.html]s to the twitter data we've been ingesting. The query looks like this:
|
||||||
|
|
||||||
|
<pre><code>{
|
||||||
|
"queryType": "groupBy",
|
||||||
|
"dataSource": "twitterstream",
|
||||||
|
"granularity": "all",
|
||||||
|
"dimensions": ["lang", "utc_offset"],
|
||||||
|
"aggregations":[
|
||||||
|
{ "type": "count", "name": "rows"},
|
||||||
|
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
|
||||||
|
],
|
||||||
|
"filter": { "type": "selector", "dimension": "lang", "value": "en" },
|
||||||
|
"intervals":["2012-10-01T00:00/2020-01-01T00"]
|
||||||
|
}
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
This is a **groupBy** query, which you may be familiar with from SQL. We are grouping, or aggregating, via the **dimensions** field: ["lang", "utc_offset"]. We are **filtering** via the **"lang"** dimension, to only look at english tweets. Our **aggregations** are what we are calculating: a row count, and the sum of the tweets in our data.
|
||||||
|
|
||||||
|
The result looks something like this:
|
||||||
|
|
||||||
|
<pre><code>[
|
||||||
|
{
|
||||||
|
"version": "v1",
|
||||||
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
|
"event": {
|
||||||
|
"utc_offset": "-10800",
|
||||||
|
"tweets": 90,
|
||||||
|
"lang": "en",
|
||||||
|
"rows": 81
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "v1",
|
||||||
|
"timestamp": "2012-10-01T00:00:00.000Z",
|
||||||
|
"event": {
|
||||||
|
"utc_offset": "-14400",
|
||||||
|
"tweets": 177,
|
||||||
|
"lang": "en",
|
||||||
|
"rows": 154
|
||||||
|
}
|
||||||
|
},
|
||||||
|
...
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
This data, plotted in a time series/distribution, looks something like this:
|
||||||
|
|
||||||
|
!http://metamarkets.com/wp-content/uploads/2013/06/tweets_timezone_offset.png(Timezone / Tweets Scatter Plot)!
|
||||||
|
|
||||||
|
This groupBy query is a bit complicated and we'll return to it later. For the time being, just make sure you are getting some blocks of data back. If you are having problems, make sure you have "curl":http://curl.haxx.se/ installed. Control+C to break out of the client script.
|
||||||
|
|
||||||
|
h2. Querying Druid
|
||||||
|
|
||||||
|
In your favorite editor, create the file:
|
||||||
|
<pre>time_boundary_query.body</pre>
|
||||||
|
|
||||||
|
Druid queries are JSON blobs which are relatively painless to create programmatically, but an absolute pain to write by hand. So anyway, we are going to create a Druid query by hand. Add the following to the file you just created:
|
||||||
|
<pre><code>{
|
||||||
|
"queryType" : "timeBoundary",
|
||||||
|
"dataSource" : "twitterstream"
|
||||||
|
}
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
The "TimeBoundaryQuery":TimeBoundaryQuery.html is one of the simplest Druid queries. To run the query, you can issue:
|
||||||
|
<pre><code>
|
||||||
|
curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @time_boundary_query.body
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
We get something like this JSON back:
|
||||||
|
|
||||||
|
<pre><code>[ {
|
||||||
|
"timestamp" : "2013-06-10T19:09:00.000Z",
|
||||||
|
"result" : {
|
||||||
|
"minTime" : "2013-06-10T19:09:00.000Z",
|
||||||
|
"maxTime" : "2013-06-10T20:50:00.000Z"
|
||||||
|
}
|
||||||
|
} ]
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
That's the result. What information do you think the result is conveying?
|
||||||
|
...
|
||||||
|
If you said the result is indicating the maximum and minimum timestamps we've seen thus far (summarized to a minutely granularity), you are absolutely correct. I can see you are a person legitimately interested in learning about Druid. Let's explore a bit further.
|
||||||
|
|
||||||
|
Return to your favorite editor and create the file:
|
||||||
|
|
||||||
|
<pre>timeseries_query.body</pre>
|
||||||
|
|
||||||
|
We are going to make a slightly more complicated query, the "TimeseriesQuery":TimeseriesQuery.html. Copy and paste the following into the file:
|
||||||
|
|
||||||
|
<pre><code>{
|
||||||
|
"queryType":"timeseries",
|
||||||
|
"dataSource":"twitterstream",
|
||||||
|
"intervals":["2010-01-01/2020-01-01"],
|
||||||
|
"granularity":"all",
|
||||||
|
"aggregations":[
|
||||||
|
{ "type": "count", "name": "rows"},
|
||||||
|
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
You are probably wondering, what are these "Granularities":Granularities.html and "Aggregations":Aggregations.html things? What the query is doing is aggregating some metrics over some span of time.
|
||||||
|
To issue the query and get some results, run the following in your command line:
|
||||||
|
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @timeseries_query.body</code></pre>
|
||||||
|
|
||||||
|
Once again, you should get a JSON blob of text back with your results, that looks something like this:
|
||||||
|
|
||||||
|
<pre><code>[ {
|
||||||
|
"timestamp" : "2013-06-10T19:09:00.000Z",
|
||||||
|
"result" : {
|
||||||
|
"tweets" : 358562.0,
|
||||||
|
"rows" : 272271
|
||||||
|
}
|
||||||
|
} ]
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
If you issue the query again, you should notice your results updating.
|
||||||
|
|
||||||
|
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? What field can we change in the query to accomplish this?
|
||||||
|
|
||||||
|
If you loudly exclaimed "we can change granularity to minute", you are absolutely correct again! We can specify different granularities to bucket our results, like so:
|
||||||
|
|
||||||
|
<pre><code>{
|
||||||
|
"queryType":"timeseries",
|
||||||
|
"dataSource":"twitterstream",
|
||||||
|
"intervals":["2010-01-01/2020-01-01"],
|
||||||
|
"granularity":"minute",
|
||||||
|
"aggregations":[
|
||||||
|
{ "type": "count", "name": "rows"},
|
||||||
|
{ "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
This gives us something like the following:
|
||||||
|
|
||||||
|
<pre><code>[ {
|
||||||
|
"timestamp" : "2013-06-10T19:09:00.000Z",
|
||||||
|
"result" : {
|
||||||
|
"tweets" : 2650.0,
|
||||||
|
"rows" : 2120
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"timestamp" : "2013-06-10T19:10:00.000Z",
|
||||||
|
"result" : {
|
||||||
|
"tweets" : 3401.0,
|
||||||
|
"rows" : 2609
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"timestamp" : "2013-06-10T19:11:00.000Z",
|
||||||
|
"result" : {
|
||||||
|
"tweets" : 3472.0,
|
||||||
|
"rows" : 2610
|
||||||
|
}
|
||||||
|
},
|
||||||
|
...
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
h2. Solving a Problem
|
||||||
|
|
||||||
|
One of Druid's main powers (see what we did there?) is to provide answers to problems, so let's pose a problem. What if we wanted to know what the top hash tags are, ordered by the number tweets, where the language is english, over the last few minutes you've been reading this tutorial? To solve this problem, we have to return to the query we introduced at the very beginning of this tutorial, the "GroupByQuery":GroupByQuery.html. It would be nice if we could group by results by dimension value and somehow sort those results... and it turns out we can!
|
||||||
|
|
||||||
|
Let's create the file:
|
||||||
|
<pre>group_by_query.body</pre>
|
||||||
|
and put the following in there:
|
||||||
|
<pre><code>{
|
||||||
|
"queryType": "groupBy",
|
||||||
|
"dataSource": "twitterstream",
|
||||||
|
"granularity": "all",
|
||||||
|
"dimensions": ["htags"],
|
||||||
|
"orderBy": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
|
||||||
|
"aggregations":[
|
||||||
|
{ "type": "longSum", "fieldName": "tweets", "name": "tweets"}
|
||||||
|
],
|
||||||
|
"filter": {"type": "selector", "dimension": "lang", "value": "en" },
|
||||||
|
"intervals":["2012-10-01T00:00/2020-01-01T00"]
|
||||||
|
}
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
Woah! Our query just got a way more complicated. Now we have these "Filters":Filters.html things and this "OrderBy":OrderBy.html thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||||
|
|
||||||
|
If you issue the query:
|
||||||
|
<pre><code>curl -X POST 'http://localhost:8080/druid/v2/?pretty' -H 'content-type: application/json' -d @group_by_query.body</code></pre>
|
||||||
|
|
||||||
|
You should hopefully see an answer to our question. For my twitter stream, it looks like this:
|
||||||
|
|
||||||
|
<pre><code>[ {
|
||||||
|
"version" : "v1",
|
||||||
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
|
"event" : {
|
||||||
|
"tweets" : 2660,
|
||||||
|
"htags" : "android"
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"version" : "v1",
|
||||||
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
|
"event" : {
|
||||||
|
"tweets" : 1944,
|
||||||
|
"htags" : "E3"
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"version" : "v1",
|
||||||
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
|
"event" : {
|
||||||
|
"tweets" : 1927,
|
||||||
|
"htags" : "15SueñosPendientes"
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"version" : "v1",
|
||||||
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
|
"event" : {
|
||||||
|
"tweets" : 1717,
|
||||||
|
"htags" : "ipad"
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"version" : "v1",
|
||||||
|
"timestamp" : "2012-10-01T00:00:00.000Z",
|
||||||
|
"event" : {
|
||||||
|
"tweets" : 1515,
|
||||||
|
"htags" : "IDidntTextYouBackBecause"
|
||||||
|
}
|
||||||
|
} ]
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
Feel free to tweak other query parameters to answer other questions you may have about the data.
|
||||||
|
|
||||||
|
h2. Additional Information
|
||||||
|
|
||||||
|
This tutorial is merely showcasing a small fraction of what Druid can do. Next, continue on to "Loading Your Data":./Loading-Your-Data.html.
|
||||||
|
|
||||||
|
And thus concludes our journey! Hopefully you learned a thing or two about Druid real-time ingestion, querying Druid, and how Druid can be used to solve problems. If you have additional questions, feel free to post in our "google groups page":http://www.groups.google.com/forum/#!forum/druid-development.
|
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
This page discusses how we do versioning and provides information on our stable releases.
|
This page discusses how we do versioning and provides information on our stable releases.
|
||||||
|
|
||||||
|
@ -8,13 +8,13 @@ Versioning Strategy
|
||||||
|
|
||||||
We generally follow [semantic versioning](http://semver.org/). The general idea is
|
We generally follow [semantic versioning](http://semver.org/). The general idea is
|
||||||
|
|
||||||
- “Major” version (leftmost): backwards incompatible, no guarantees exist about APIs between the versions
|
* "Major" version (leftmost): backwards incompatible, no guarantees exist about APIs between the versions
|
||||||
- “Minor” version (middle number): you can move forward from a smaller number to a larger number, but moving backwards *might* be incompatible.
|
* "Minor" version (middle number): you can move forward from a smaller number to a larger number, but moving backwards *might* be incompatible.
|
||||||
- “bug-fix” version (“patch” or the rightmost): Interchangeable. The higher the number, the more things are fixed (hopefully), but the programming interfaces are completely compatible and you should be able to just drop in a new jar and have it work.
|
* "bug-fix" version ("patch" or the rightmost): Interchangeable. The higher the number, the more things are fixed (hopefully), but the programming interfaces are completely compatible and you should be able to just drop in a new jar and have it work.
|
||||||
|
|
||||||
Note that this is defined in terms of programming API, **not** in terms of functionality. It is possible that a brand new awesome way of doing something is introduced in a “bug-fix” release version if it doesn’t add to the public API or change it.
|
Note that this is defined in terms of programming API, **not** in terms of functionality. It is possible that a brand new awesome way of doing something is introduced in a "bug-fix" release version if it doesn’t add to the public API or change it.
|
||||||
|
|
||||||
One exception for right now, while we are still in major version 0, we are considering the APIs to be in beta and are conflating “major” and “minor” so a minor version increase could be backwards incompatible for as long as we are at major version 0. These will be communicated via email on the group.
|
One exception for right now, while we are still in major version 0, we are considering the APIs to be in beta and are conflating "major" and "minor" so a minor version increase could be backwards incompatible for as long as we are at major version 0. These will be communicated via email on the group.
|
||||||
|
|
||||||
For external deployments, we recommend running the stable release tag. Releases are considered stable after we have deployed them into our production environment and they have operated bug-free for some time.
|
For external deployments, we recommend running the stable release tag. Releases are considered stable after we have deployed them into our production environment and they have operated bug-free for some time.
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: doc_page
|
||||||
---
|
---
|
||||||
Druid uses ZooKeeper (ZK) for management of current cluster state. The operations that happen over ZK are
|
Druid uses ZooKeeper (ZK) for management of current cluster state. The operations that happen over ZK are
|
||||||
|
|
||||||
1. [Master](Master.html) leader election
|
1. [Master](Master.html) leader election
|
||||||
2. Segment “publishing” protocol from [Compute](Compute.html) and [Realtime](Realtime.html)
|
2. Segment "publishing" protocol from [Compute](Compute.html) and [Realtime](Realtime.html)
|
||||||
3. Segment load/drop protocol between [Master](Master.html) and [Compute](Compute.html)
|
3. Segment load/drop protocol between [Master](Master.html) and [Compute](Compute.html)
|
||||||
|
|
||||||
### Property Configuration
|
### Property Configuration
|
||||||
|
@ -13,45 +13,59 @@ ZooKeeper paths are set via the `runtime.properties` configuration file. Druid w
|
||||||
|
|
||||||
There is a prefix path that is required and can be used as the only (well, kinda, see the note below) path-related zookeeper configuration parameter (everything else will be a default based on the prefix):
|
There is a prefix path that is required and can be used as the only (well, kinda, see the note below) path-related zookeeper configuration parameter (everything else will be a default based on the prefix):
|
||||||
|
|
||||||
druid.zk.paths.base
|
```
|
||||||
|
druid.zk.paths.base
|
||||||
|
```
|
||||||
|
|
||||||
You can also override each individual path (defaults are shown below):
|
You can also override each individual path (defaults are shown below):
|
||||||
|
|
||||||
druid.zk.paths.propertiesPath=${druid.zk.paths.base}/properties
|
```
|
||||||
druid.zk.paths.announcementsPath=${druid.zk.paths.base}/announcements
|
druid.zk.paths.propertiesPath=${druid.zk.paths.base}/properties
|
||||||
druid.zk.paths.servedSegmentsPath=${druid.zk.paths.base}/servedSegments
|
druid.zk.paths.announcementsPath=${druid.zk.paths.base}/announcements
|
||||||
druid.zk.paths.loadQueuePath=${druid.zk.paths.base}/loadQueue
|
druid.zk.paths.servedSegmentsPath=${druid.zk.paths.base}/servedSegments
|
||||||
druid.zk.paths.masterPath=${druid.zk.paths.base}/master
|
druid.zk.paths.loadQueuePath=${druid.zk.paths.base}/loadQueue
|
||||||
druid.zk.paths.indexer.announcementsPath=${druid.zk.paths.base}/indexer/announcements
|
druid.zk.paths.masterPath=${druid.zk.paths.base}/master
|
||||||
druid.zk.paths.indexer.tasksPath=${druid.zk.paths.base}/indexer/tasks
|
druid.zk.paths.indexer.announcementsPath=${druid.zk.paths.base}/indexer/announcements
|
||||||
druid.zk.paths.indexer.statusPath=${druid.zk.paths.base}/indexer/status
|
druid.zk.paths.indexer.tasksPath=${druid.zk.paths.base}/indexer/tasks
|
||||||
druid.zk.paths.indexer.leaderLatchPath=${druid.zk.paths.base}/indexer/leaderLatchPath
|
druid.zk.paths.indexer.statusPath=${druid.zk.paths.base}/indexer/status
|
||||||
|
druid.zk.paths.indexer.leaderLatchPath=${druid.zk.paths.base}/indexer/leaderLatchPath
|
||||||
|
```
|
||||||
|
|
||||||
NOTE: We also use Curator’s service discovery module to expose some services via zookeeper. This also uses a zookeeper path, but this path is **not** affected by `druid.zk.paths.base` and **must** be specified separately. This property is
|
NOTE: We also use Curator’s service discovery module to expose some services via zookeeper. This also uses a zookeeper path, but this path is **not** affected by `druid.zk.paths.base` and **must** be specified separately. This property is
|
||||||
|
|
||||||
druid.zk.paths.discoveryPath
|
```
|
||||||
|
druid.zk.paths.discoveryPath
|
||||||
|
```
|
||||||
|
|
||||||
### Master Leader Election
|
### Master Leader Election
|
||||||
|
|
||||||
We use the Curator LeadershipLatch recipe to do leader election at path
|
We use the Curator LeadershipLatch recipe to do leader election at path
|
||||||
|
|
||||||
${druid.zk.paths.masterPath}/_MASTER
|
```
|
||||||
|
${druid.zk.paths.masterPath}/_MASTER
|
||||||
|
```
|
||||||
|
|
||||||
### Segment “publishing” protocol from Compute and Realtime
|
### Segment "publishing" protocol from Compute and Realtime
|
||||||
|
|
||||||
The `announcementsPath` and `servedSegmentsPath` are used for this.
|
The `announcementsPath` and `servedSegmentsPath` are used for this.
|
||||||
|
|
||||||
All [Compute](Compute.html) and [Realtime](Realtime.html) nodes publish themselves on the `announcementsPath`, specifically, they will create an ephemeral znode at
|
All [Compute](Compute.html) and [Realtime](Realtime.html) nodes publish themselves on the `announcementsPath`, specifically, they will create an ephemeral znode at
|
||||||
|
|
||||||
${druid.zk.paths.announcementsPath}/${druid.host}
|
```
|
||||||
|
${druid.zk.paths.announcementsPath}/${druid.host}
|
||||||
|
```
|
||||||
|
|
||||||
Which signifies that they exist. They will also subsequently create a permanent znode at
|
Which signifies that they exist. They will also subsequently create a permanent znode at
|
||||||
|
|
||||||
${druid.zk.paths.servedSegmentsPath}/${druid.host}
|
```
|
||||||
|
${druid.zk.paths.servedSegmentsPath}/${druid.host}
|
||||||
|
```
|
||||||
|
|
||||||
And as they load up segments, they will attach ephemeral znodes that look like
|
And as they load up segments, they will attach ephemeral znodes that look like
|
||||||
|
|
||||||
${druid.zk.paths.servedSegmentsPath}/${druid.host}/_segment_identifier_
|
```
|
||||||
|
${druid.zk.paths.servedSegmentsPath}/${druid.host}/_segment_identifier_
|
||||||
|
```
|
||||||
|
|
||||||
Nodes like the [Master](Master.html) and [Broker](Broker.html) can then watch these paths to see which nodes are currently serving which segments.
|
Nodes like the [Master](Master.html) and [Broker](Broker.html) can then watch these paths to see which nodes are currently serving which segments.
|
||||||
|
|
||||||
|
@ -61,6 +75,8 @@ The `loadQueuePath` is used for this.
|
||||||
|
|
||||||
When the [Master](Master.html) decides that a [Compute](Compute.html) node should load or drop a segment, it writes an ephemeral znode to
|
When the [Master](Master.html) decides that a [Compute](Compute.html) node should load or drop a segment, it writes an ephemeral znode to
|
||||||
|
|
||||||
${druid.zk.paths.loadQueuePath}/_host_of_compute_node/_segment_identifier
|
```
|
||||||
|
${druid.zk.paths.loadQueuePath}/_host_of_compute_node/_segment_identifier
|
||||||
|
```
|
||||||
|
|
||||||
This node will contain a payload that indicates to the Compute node what it should do with the given segment. When the Compute node is done with the work, it will delete the znode in order to signify to the Master that it is complete.
|
This node will contain a payload that indicates to the Compute node what it should do with the given segment. When the Compute node is done with the work, it will delete the znode in order to signify to the Master that it is complete.
|
||||||
|
|
|
@ -1,71 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
Contents
|
|
||||||
\* [Introduction|Home](Introduction|Home.html)
|
|
||||||
\* [Download](Download.html)
|
|
||||||
\* [Support](Support.html)
|
|
||||||
\* [Contribute](Contribute.html)
|
|
||||||
========================
|
|
||||||
|
|
||||||
Getting Started
|
|
||||||
\* [Tutorial: A First Look at Druid](Tutorial:-A-First-Look-at-Druid.html)
|
|
||||||
\* [Tutorial: The Druid Cluster](Tutorial:-The-Druid-Cluster.html)
|
|
||||||
\* [Loading Your Data](Loading-Your-Data.html)
|
|
||||||
\* [Querying Your Data](Querying-Your-Data.html)
|
|
||||||
\* [Booting a Production Cluster](Booting-a-Production-Cluster.html)
|
|
||||||
\* [Examples](Examples.html)
|
|
||||||
\* [Cluster Setup](Cluster-Setup.html)
|
|
||||||
\* [Configuration](Configuration.html)
|
|
||||||
--------------------------------------
|
|
||||||
|
|
||||||
Data Ingestion
|
|
||||||
\* [Realtime](Realtime.html)
|
|
||||||
\* [Batch|Batch Ingestion](Batch|Batch-Ingestion.html)
|
|
||||||
\* [Indexing Service](Indexing-Service.html)
|
|
||||||
----------------------------
|
|
||||||
|
|
||||||
Querying
|
|
||||||
\* [Querying](Querying.html)
|
|
||||||
**\* ]
|
|
||||||
**\* [Aggregations](Aggregations.html)
|
|
||||||
**\* ]
|
|
||||||
**\* [Granularities](Granularities.html)
|
|
||||||
\* Query Types
|
|
||||||
**\* ]
|
|
||||||
****\* ]
|
|
||||||
****\* ]
|
|
||||||
**\* [SearchQuery](SearchQuery.html)
|
|
||||||
**\* ]
|
|
||||||
** [SegmentMetadataQuery](SegmentMetadataQuery.html)
|
|
||||||
**\* ]
|
|
||||||
**\* [TimeseriesQuery](TimeseriesQuery.html)
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
Architecture
|
|
||||||
\* [Design](Design.html)
|
|
||||||
\* [Segments](Segments.html)
|
|
||||||
\* Node Types
|
|
||||||
**\* ]
|
|
||||||
**\* [Broker](Broker.html)
|
|
||||||
**\* ]
|
|
||||||
****\* ]
|
|
||||||
**\* [Realtime](Realtime.html)
|
|
||||||
**\* ]
|
|
||||||
**\* [Plumber](Plumber.html)
|
|
||||||
\* External Dependencies
|
|
||||||
**\* ]
|
|
||||||
**\* [MySQL](MySQL.html)
|
|
||||||
**\* ]
|
|
||||||
** [Concepts and Terminology](Concepts-and-Terminology.html)
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
Development
|
|
||||||
\* [Versioning](Versioning.html)
|
|
||||||
\* [Build From Source](Build-From-Source.html)
|
|
||||||
\* [Libraries](Libraries.html)
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Misc
|
|
||||||
\* [Thanks](Thanks.html)
|
|
||||||
-------------
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
---
|
||||||
|
---
|
||||||
|
|
||||||
|
* [Introduction|Home](Introduction|Home.html)
|
||||||
|
* [Download](Download.html)
|
||||||
|
* [Support](Support.html)
|
||||||
|
* [Contribute](Contribute.html)
|
||||||
|
========================
|
||||||
|
|
||||||
|
Getting Started
|
||||||
|
* [Tutorial: A First Look at Druid](Tutorial:-A-First-Look-at-Druid.html)
|
||||||
|
* [Tutorial: The Druid Cluster](Tutorial:-The-Druid-Cluster.html)
|
||||||
|
* [Loading Your Data](Loading-Your-Data.html)
|
||||||
|
* [Querying Your Data](Querying-Your-Data.html)
|
||||||
|
* [Booting a Production Cluster](Booting-a-Production-Cluster.html)
|
||||||
|
* [Examples](Examples.html)
|
||||||
|
* [Cluster Setup](Cluster-Setup.html)
|
||||||
|
* [Configuration](Configuration.html)
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
Data Ingestion
|
||||||
|
* [Realtime](Realtime.html)
|
||||||
|
* [Batch|Batch Ingestion](Batch|Batch-Ingestion.html)
|
||||||
|
* [Indexing Service](Indexing-Service.html)
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
Querying
|
||||||
|
* [Querying](Querying.html)
|
||||||
|
*** ]
|
||||||
|
*** [Aggregations](Aggregations.html)
|
||||||
|
*** ]
|
||||||
|
*** [Granularities](Granularities.html)
|
||||||
|
* Query Types
|
||||||
|
*** ]
|
||||||
|
***** ]
|
||||||
|
***** ]
|
||||||
|
*** [SearchQuery](SearchQuery.html)
|
||||||
|
*** ]
|
||||||
|
** [SegmentMetadataQuery](SegmentMetadataQuery.html)
|
||||||
|
*** ]
|
||||||
|
*** [TimeseriesQuery](TimeseriesQuery.html)
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Architecture
|
||||||
|
* [Design](Design.html)
|
||||||
|
* [Segments](Segments.html)
|
||||||
|
* Node Types
|
||||||
|
*** ]
|
||||||
|
*** [Broker](Broker.html)
|
||||||
|
*** ]
|
||||||
|
***** ]
|
||||||
|
*** [Realtime](Realtime.html)
|
||||||
|
*** ]
|
||||||
|
*** [Plumber](Plumber.html)
|
||||||
|
* External Dependencies
|
||||||
|
*** ]
|
||||||
|
*** [MySQL](MySQL.html)
|
||||||
|
*** ]
|
||||||
|
** [Concepts and Terminology](Concepts-and-Terminology.html)
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Development
|
||||||
|
* [Versioning](Versioning.html)
|
||||||
|
* [Build From Source](Build-From-Source.html)
|
||||||
|
* [Libraries](Libraries.html)
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
Misc
|
||||||
|
* [Thanks](Thanks.html)
|
||||||
|
-------------
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -1,592 +0,0 @@
|
||||||
@font-face {
|
|
||||||
font-family: 'Conv_framd';
|
|
||||||
src: url('../fonts/framd.eot');
|
|
||||||
src: url('../fonts/framd.eot?#iefix') format('embedded-opentype'),
|
|
||||||
url('../fonts/framd.woff') format('woff'),
|
|
||||||
url('../fonts/framd.ttf') format('truetype'),
|
|
||||||
url('../fonts/framd.svg#heroregular') format('svg');
|
|
||||||
font-weight: normal;
|
|
||||||
font-style: normal;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
html, body {
|
|
||||||
position:relative;
|
|
||||||
height:100%;
|
|
||||||
min-height:100%;
|
|
||||||
height:100%;
|
|
||||||
color:#252525;
|
|
||||||
font:400 18px/26px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
padding:0;
|
|
||||||
margin:0;
|
|
||||||
word-wrap:break-word;
|
|
||||||
}
|
|
||||||
a {
|
|
||||||
color:#6ab6dd;
|
|
||||||
position:relative;
|
|
||||||
}
|
|
||||||
a:hover {
|
|
||||||
text-decoration:underline;
|
|
||||||
color:#2c79a1;
|
|
||||||
}
|
|
||||||
.wrapper {
|
|
||||||
min-height:100%;
|
|
||||||
}
|
|
||||||
header {
|
|
||||||
margin:0 0 100px;
|
|
||||||
}
|
|
||||||
header .span12 {
|
|
||||||
padding:0 0 7px 0;
|
|
||||||
}
|
|
||||||
.logo.custom {
|
|
||||||
display:inline-block;
|
|
||||||
margin:0;
|
|
||||||
vertical-align:25px;
|
|
||||||
}
|
|
||||||
.logo.custom a {
|
|
||||||
background:url(../img/logo.png) no-repeat;
|
|
||||||
width: 110px;
|
|
||||||
height: 49px;
|
|
||||||
display:block;
|
|
||||||
text-indent:-9999px;
|
|
||||||
}
|
|
||||||
.custom.navbar {
|
|
||||||
margin:10px 0;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li {
|
|
||||||
padding:0 !important;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li a, .navbar .brand, .custom.navbar .nav li.pull-right span {
|
|
||||||
font:300 14px/20px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
padding:10px 10px;
|
|
||||||
}
|
|
||||||
.navbar .brand.active {
|
|
||||||
color:#fff;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li.pull-right span {
|
|
||||||
padding-right:0;
|
|
||||||
color:#e76d4c !important;
|
|
||||||
display:block;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li a.doc-link {
|
|
||||||
padding:5px 10px 0;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li a.doc-link span {
|
|
||||||
display:inline-block;
|
|
||||||
background:url(../img/icon-git.png) no-repeat;
|
|
||||||
width: 28px;
|
|
||||||
height: 28px;
|
|
||||||
vertical-align:-7px;
|
|
||||||
margin-left:5px;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li a.doc-link:hover span, .custom.navbar .nav li.active a.doc-link span {
|
|
||||||
background-position:0 -28px;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav {
|
|
||||||
float:none;
|
|
||||||
}
|
|
||||||
.navbar .nav > li.pull-right {
|
|
||||||
float:right;
|
|
||||||
padding:10px 0;
|
|
||||||
}
|
|
||||||
h1 {
|
|
||||||
font:300 48px/48px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0 0 45px;
|
|
||||||
}
|
|
||||||
h1.index {
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
h1 span {
|
|
||||||
display:block;
|
|
||||||
font:400 14px/28px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
}
|
|
||||||
h2 {
|
|
||||||
font:30px/30px 'Conv_framd', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0 0 20px;
|
|
||||||
color:#0f1e35;
|
|
||||||
}
|
|
||||||
h3 {
|
|
||||||
font:300 30px/36px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0 0 33px;
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
.btn {
|
|
||||||
display:block;
|
|
||||||
margin:0 auto 65px;
|
|
||||||
background:#6ab6dd;
|
|
||||||
border:none;
|
|
||||||
box-shadow:inset -3px -3px 3px #5592b1;
|
|
||||||
height:53px;
|
|
||||||
width:205px;
|
|
||||||
font:30px/53px 'Conv_framd', Arial, Helvetica, sans-serif;
|
|
||||||
color:#252424;
|
|
||||||
text-shadow:none;
|
|
||||||
padding:0;
|
|
||||||
z-index:100;
|
|
||||||
position:relative;
|
|
||||||
}
|
|
||||||
.btn a {
|
|
||||||
color:#252424;
|
|
||||||
}
|
|
||||||
.btn:hover {
|
|
||||||
background:#83c6e9;
|
|
||||||
}
|
|
||||||
.index-content {
|
|
||||||
margin:0 auto 60px;
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
.third {
|
|
||||||
margin:0 auto 35px;
|
|
||||||
}
|
|
||||||
.third-item {
|
|
||||||
text-align:left;
|
|
||||||
}
|
|
||||||
.third-item:first-child {
|
|
||||||
margin:0;
|
|
||||||
}
|
|
||||||
.third-item a {
|
|
||||||
display:block;
|
|
||||||
font-family:'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
font-weight:700;
|
|
||||||
font-size:30px;
|
|
||||||
margin:0 auto 20px;
|
|
||||||
color:#252424;
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
.container.custom {
|
|
||||||
padding:0;
|
|
||||||
margin:0 auto;
|
|
||||||
}
|
|
||||||
.container.custom.main-cont {
|
|
||||||
padding-bottom:230px;
|
|
||||||
}
|
|
||||||
.text-part {
|
|
||||||
padding-top:70px;
|
|
||||||
}
|
|
||||||
.row-fluid.index-page {
|
|
||||||
padding-top:100px;
|
|
||||||
}
|
|
||||||
.index-page .content {
|
|
||||||
padding:15px 0 0;
|
|
||||||
}
|
|
||||||
.index-page h3 {
|
|
||||||
text-align:left;
|
|
||||||
}
|
|
||||||
.index-page .sidebar {
|
|
||||||
padding:65px 0 30px;
|
|
||||||
}
|
|
||||||
.container.custom p {
|
|
||||||
margin:0 0 17px;
|
|
||||||
}
|
|
||||||
.homepage .index-page .content h2 {
|
|
||||||
margin:0 0 20px;
|
|
||||||
}
|
|
||||||
.container.custom .unstyled {
|
|
||||||
margin:0;
|
|
||||||
color:#353535;
|
|
||||||
}
|
|
||||||
.container.custom .unstyled li {
|
|
||||||
margin:0 0 17px;
|
|
||||||
}
|
|
||||||
.container.custom .unstyled li span {
|
|
||||||
font-family:'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
font-weight:700;
|
|
||||||
display:block;
|
|
||||||
}
|
|
||||||
.container.custom .unstyled li a {
|
|
||||||
display:inline;
|
|
||||||
}
|
|
||||||
.homepage h4 {
|
|
||||||
font:24px/24px 'Conv_framd', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0 0 15px;
|
|
||||||
}
|
|
||||||
.container.custom .sidebar .unstyled {
|
|
||||||
margin:0 0 100px;
|
|
||||||
}
|
|
||||||
.container.custom .sidebar .unstyled li {
|
|
||||||
margin:0 0 12px;
|
|
||||||
border-bottom:1px solid #adadad;
|
|
||||||
padding: 5px 7px;
|
|
||||||
}
|
|
||||||
.grey-box {
|
|
||||||
background:#e5e4e3;
|
|
||||||
border-radius:3px;
|
|
||||||
-moz-border-radius:3px;
|
|
||||||
-webkit-border-radius:3px;
|
|
||||||
position:relative;
|
|
||||||
padding:20px 10px 130px;
|
|
||||||
color:#000;
|
|
||||||
}
|
|
||||||
footer {
|
|
||||||
text-align:center;
|
|
||||||
font-size:14px;
|
|
||||||
color:#000;
|
|
||||||
margin:-135px 0 0;
|
|
||||||
}
|
|
||||||
footer .container.custom {
|
|
||||||
border-top:1px solid #e1e1e1;
|
|
||||||
padding:20px 0 25px;
|
|
||||||
}
|
|
||||||
footer .span9 {
|
|
||||||
text-align:left;
|
|
||||||
}
|
|
||||||
footer .container.custom ul.unstyled {
|
|
||||||
display:inline-block;
|
|
||||||
margin:0 120px 30px 30px;
|
|
||||||
text-align:left;
|
|
||||||
vertical-align:top;
|
|
||||||
}
|
|
||||||
footer .container.custom ul.unstyled li {
|
|
||||||
font:300 14px/26px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0;
|
|
||||||
}
|
|
||||||
footer .container.custom .unstyled li a {
|
|
||||||
color:#000;
|
|
||||||
font-weight:300;
|
|
||||||
}
|
|
||||||
footer .container.custom .unstyled li:first-child a {
|
|
||||||
font-weight:400;
|
|
||||||
}
|
|
||||||
footer ul li a:hover {
|
|
||||||
text-decoration:underline;
|
|
||||||
color:#fff;
|
|
||||||
}
|
|
||||||
footer .logo-block {
|
|
||||||
text-align:right;
|
|
||||||
}
|
|
||||||
footer .container.custom p {
|
|
||||||
display:inline-block;
|
|
||||||
margin:28px 0 0 10px;
|
|
||||||
text-align:left;
|
|
||||||
}
|
|
||||||
.contact-item {
|
|
||||||
margin:0 0 30px 30px;
|
|
||||||
text-align:left;
|
|
||||||
font-weight:300;
|
|
||||||
}
|
|
||||||
.contact-item a {
|
|
||||||
color:#000;
|
|
||||||
}
|
|
||||||
footer .contact-item span {
|
|
||||||
font-weight:400;
|
|
||||||
display:block;
|
|
||||||
}
|
|
||||||
footer .contact-item:first-child span {
|
|
||||||
text-transform:uppercase;
|
|
||||||
}
|
|
||||||
footer .span4 {
|
|
||||||
text-align:left;
|
|
||||||
}
|
|
||||||
footer .span5 {
|
|
||||||
padding-top: 75px;
|
|
||||||
}
|
|
||||||
.soc {
|
|
||||||
text-align:left;
|
|
||||||
margin:5px 0 0 0;
|
|
||||||
}
|
|
||||||
.soc a {
|
|
||||||
display:inline-block;
|
|
||||||
width:35px;
|
|
||||||
height:34px;
|
|
||||||
background:url(../img/icons-soc.png) no-repeat;
|
|
||||||
}
|
|
||||||
.soc a.github {
|
|
||||||
background-position:0 -34px;
|
|
||||||
}
|
|
||||||
.soc a.meet {
|
|
||||||
background-position:0 -68px;
|
|
||||||
}
|
|
||||||
.soc a.rss {
|
|
||||||
background-position:0 -102px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.text-item {
|
|
||||||
margin:0 0 75px;
|
|
||||||
}
|
|
||||||
.container.custom p.note {
|
|
||||||
text-align:center;
|
|
||||||
padding:30px 0 0;
|
|
||||||
}
|
|
||||||
.text-item strong {
|
|
||||||
font-weight:normal;
|
|
||||||
font-family:'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
font-weight:700;
|
|
||||||
}
|
|
||||||
h2.date {
|
|
||||||
font-family:'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
font-weight:400;
|
|
||||||
}
|
|
||||||
.blog h2.date {
|
|
||||||
margin:0 0 25px;
|
|
||||||
}
|
|
||||||
h2.date span {
|
|
||||||
display:block;
|
|
||||||
margin:0 0 5px;
|
|
||||||
padding:0 0 15px;
|
|
||||||
font-size:20px;
|
|
||||||
border-bottom:1px solid #ccc;
|
|
||||||
}
|
|
||||||
.blog h2.date a {
|
|
||||||
font-weight:700;
|
|
||||||
}
|
|
||||||
.blog.inner h2.date span:first-child {
|
|
||||||
display:block;
|
|
||||||
font-size:30px;
|
|
||||||
font-weight:700;
|
|
||||||
padding:0;
|
|
||||||
border:none;
|
|
||||||
}
|
|
||||||
.blog.inner h3 {
|
|
||||||
text-align:left;
|
|
||||||
font-size:25px;
|
|
||||||
font-weight:700;
|
|
||||||
margin:0 0 15px;
|
|
||||||
}
|
|
||||||
.blog.inner ul li {
|
|
||||||
margin-left: 50px;
|
|
||||||
line-height:26px;
|
|
||||||
}
|
|
||||||
.recent h3 {
|
|
||||||
font-size: 25px;
|
|
||||||
font-weight: 700;
|
|
||||||
margin: 0 0 15px;
|
|
||||||
text-align: left;
|
|
||||||
}
|
|
||||||
.recent ul li.active a {
|
|
||||||
color:#252525;
|
|
||||||
}
|
|
||||||
.border {
|
|
||||||
width:130px;
|
|
||||||
margin: 45px auto;
|
|
||||||
border-top:1px solid #dfdfdf;
|
|
||||||
border-top:1px solid #81807f;
|
|
||||||
}
|
|
||||||
.text-img {
|
|
||||||
display:block;
|
|
||||||
margin:0 auto 17px;
|
|
||||||
}
|
|
||||||
.indent p, .indent ul {
|
|
||||||
padding:0 0 0 50px;
|
|
||||||
}
|
|
||||||
.span3 {
|
|
||||||
margin-left:0;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav {
|
|
||||||
border:1px solid #e5e5e5;
|
|
||||||
border-radius:5px;
|
|
||||||
box-shadow:0 0 3px #f9f9f9;
|
|
||||||
padding:0;
|
|
||||||
width:auto;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav li {
|
|
||||||
border-top:1px solid #e5e5e5;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav li:first-child {
|
|
||||||
border:none;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav li:first-child a {
|
|
||||||
border-radius:5px 5px 0 0;
|
|
||||||
-moz-border-radius:5px 5px 0 0;
|
|
||||||
-webkit-border-radius:5px 5px 0 0;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav li:last-child, .nav.nav-list.bs-docs-sidenav li:last-child a {
|
|
||||||
border-radius:0 0 5px 5px;
|
|
||||||
-moz-border-radius:0 0 5px 5px;
|
|
||||||
-webkit-border-radius:0 0 5px 5px;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav li a {
|
|
||||||
padding:10px;
|
|
||||||
margin:0;
|
|
||||||
font-weight:400;
|
|
||||||
font-size:14px;
|
|
||||||
line-height:18px;
|
|
||||||
}
|
|
||||||
.icon-chevron-right {
|
|
||||||
float: right;
|
|
||||||
margin-right: -6px;
|
|
||||||
margin-top: 2px;
|
|
||||||
opacity: 0.25;
|
|
||||||
}
|
|
||||||
.indent ul li {
|
|
||||||
line-height:26px;
|
|
||||||
}
|
|
||||||
.span8 h3 {
|
|
||||||
text-align:left;
|
|
||||||
margin:0 0 50px;
|
|
||||||
}
|
|
||||||
.span8 h3 a {
|
|
||||||
font-weight:800;
|
|
||||||
}
|
|
||||||
.span8 h4 {
|
|
||||||
font:700 18px/26px 'Open Sans', Arial, Helvetica, sans-serif;
|
|
||||||
margin:0 0 20px;
|
|
||||||
}
|
|
||||||
.span8 p span {
|
|
||||||
font-weight:700;
|
|
||||||
}
|
|
||||||
header.index-head {
|
|
||||||
background:#f9f9f9;
|
|
||||||
margin:0 0 30px;
|
|
||||||
}
|
|
||||||
header.index-head .span12 {
|
|
||||||
margin-bottom:80px;
|
|
||||||
}
|
|
||||||
.index-content h2 {
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
.third-item img {
|
|
||||||
display:block;
|
|
||||||
margin:0 auto 70px;
|
|
||||||
}
|
|
||||||
.container.custom .third-item p {
|
|
||||||
margin:0 0 0 20px;
|
|
||||||
}
|
|
||||||
.row-fluid {
|
|
||||||
margin:0;
|
|
||||||
padding:0;
|
|
||||||
}
|
|
||||||
.nav-list [class^="icon-"] {
|
|
||||||
margin-right:-2px;
|
|
||||||
}
|
|
||||||
@media (min-width: 1200px) {
|
|
||||||
.custom.navbar .nav li a, .navbar .brand, .custom.navbar .nav li.pull-right span {
|
|
||||||
padding: 10px 20px;
|
|
||||||
font-size:16px;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav {
|
|
||||||
width:258px;
|
|
||||||
}
|
|
||||||
.container.custom .recent ul.unstyled {
|
|
||||||
margin-right:100px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@media (max-width: 980px) {
|
|
||||||
.container.custom {
|
|
||||||
width:95%;
|
|
||||||
}
|
|
||||||
.bs-docs-sidenav.affix {
|
|
||||||
position: static;
|
|
||||||
top: 0;
|
|
||||||
width: 100%;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav {
|
|
||||||
width:100%;
|
|
||||||
margin-bottom:20px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@media only screen
|
|
||||||
and (min-device-width : 710px)
|
|
||||||
and (max-device-width : 770px) {
|
|
||||||
.container.custom {
|
|
||||||
width:700px;
|
|
||||||
position:relative;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li {
|
|
||||||
font-size:22px;
|
|
||||||
padding:0 10px;
|
|
||||||
}
|
|
||||||
.nav.nav-list.bs-docs-sidenav.affix {
|
|
||||||
position:fixed;
|
|
||||||
top:175px;
|
|
||||||
width:218px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@media only screen and (min-device-width : 770px)
|
|
||||||
and (max-device-width : 810px) {
|
|
||||||
.container.custom {
|
|
||||||
width:700px;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav li {
|
|
||||||
font-size:15px;
|
|
||||||
padding:0 15px;
|
|
||||||
}
|
|
||||||
.custom.navbar .nav {
|
|
||||||
margin-left:30px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@media only screen
|
|
||||||
and (min-device-width : 320px)
|
|
||||||
and (max-device-width : 480px) {
|
|
||||||
.container.custom {
|
|
||||||
width:100%;
|
|
||||||
margin:0;
|
|
||||||
}
|
|
||||||
footer .logo-block {
|
|
||||||
text-align: left;
|
|
||||||
padding-left:30px;
|
|
||||||
}
|
|
||||||
.offset1, .row-fluid .offset1:first-child {
|
|
||||||
margin:0;
|
|
||||||
}
|
|
||||||
.indent p, .indent ul {
|
|
||||||
padding:0;
|
|
||||||
}
|
|
||||||
.indent, .blog, .recent h3, .recent ul, .text-part {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
.index-head h3 {
|
|
||||||
font-size:20px;
|
|
||||||
}
|
|
||||||
.index-content {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
.content h2, .content h3, .content ul.unstyled, .sidebar h3 {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
h1 {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@media (max-width: 320px) {
|
|
||||||
.container.custom {
|
|
||||||
width:100%;
|
|
||||||
}
|
|
||||||
footer .logo-block {
|
|
||||||
text-align: left;
|
|
||||||
padding-left:30px;
|
|
||||||
}
|
|
||||||
footer .container.custom p {
|
|
||||||
margin-top:10px;
|
|
||||||
}
|
|
||||||
.offset1, .row-fluid .offset1:first-child {
|
|
||||||
margin:0;
|
|
||||||
}
|
|
||||||
.indent p, .indent ul {
|
|
||||||
padding:0;
|
|
||||||
}
|
|
||||||
.indent, .blog, .recent h3, .recent ul, .text-part {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
.index-head h3 {
|
|
||||||
font-size:25px;
|
|
||||||
line-height:30px;
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
.index-content {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
.content h2, .content h3, .content ul.unstyled, .sidebar h3 {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
h1 {
|
|
||||||
padding:0 20px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.container.custom .faq-page p {
|
|
||||||
margin-bottom:10px;
|
|
||||||
}
|
|
||||||
.index-head h3 {
|
|
||||||
margin-bottom:50px;
|
|
||||||
}
|
|
||||||
h1.center {
|
|
||||||
text-align:center;
|
|
||||||
}
|
|
||||||
.btn.btn-navbar {
|
|
||||||
height:auto;
|
|
||||||
width:auto;
|
|
||||||
margin:10px 0 0;
|
|
||||||
}
|
|
||||||
.navbar-inner {
|
|
||||||
z-index:100000;
|
|
||||||
position:relative;
|
|
||||||
}
|
|
|
@ -1,147 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8" />
|
|
||||||
<title>Druid | {{page.title}}</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/bootstrap.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/bootstrap-responsive.css" media="all" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/syntax.css" media="all" />
|
|
||||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
|
||||||
<link rel="stylesheet" type="text/css" href="/css/custom.css" media="all" />
|
|
||||||
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
|
||||||
<script src="http://code.jquery.com/jquery.js"></script>
|
|
||||||
<script src="/js/bootstrap.min.js"></script>
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="wrapper">
|
|
||||||
<header{% if page.id == 'home' %} class="index-head"{% endif %}>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span12">
|
|
||||||
<div class="navbar navbar-inverse custom">
|
|
||||||
<div class="navbar-inner">
|
|
||||||
<button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
<span class="icon-bar"></span>
|
|
||||||
</button>
|
|
||||||
<a class="brand {% if page.id == 'home' %}active{% endif %}" href="/">Home</a>
|
|
||||||
<div class="nav-collapse collapse">
|
|
||||||
<ul class="nav">
|
|
||||||
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}>
|
|
||||||
<a class="doc-link" target="_blank" href="https://github.com/metamx/druid/wiki">Documentation <span></span></a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'community' %} class="active"{% endif %}>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}>
|
|
||||||
<a href="/blog">Blog</a>
|
|
||||||
</li>
|
|
||||||
<li class="pull-right">
|
|
||||||
<span>BETA</span>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% if page.id == 'home' %}
|
|
||||||
<h3>Druid is open-source infrastructure for real²time exploratory analytics on large datasets.</h3>
|
|
||||||
<button class="btn" type="button"><a href="downloads.html">Download</a></button>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
<div class="container custom main-cont">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
<div class="container custom">
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span3">
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>CONTACT US</span>
|
|
||||||
<a href="mailto:info@druid.io">info@druid.io</a>
|
|
||||||
</div>
|
|
||||||
<div class="contact-item">
|
|
||||||
<span>Metamarkets</span>
|
|
||||||
625 2nd Street, Suite #230<br/>
|
|
||||||
San Francisco, CA 94017
|
|
||||||
<div class="soc">
|
|
||||||
<a href="https://twitter.com/druidio"></a>
|
|
||||||
<a href="https://github.com/metamx/druid" class="github"></a>
|
|
||||||
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
|
||||||
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="span9">
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/">DRUID</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/druid.html">What is Druid?</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/downloads.html">Downloads</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a target="_blank" href="https://github.com/metamx/druid/wiki">Documentation </a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">SUPPORT</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/community.html">Community</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/faq.html">FAQ</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/licensing.html">Licensing</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<ul class="unstyled">
|
|
||||||
<li>
|
|
||||||
<a href="/blog">BLOG</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<div class="logo-block">
|
|
||||||
<span class="logo custom">
|
|
||||||
<a href="/"></a>
|
|
||||||
</span>
|
|
||||||
<p>is an open source project sponsored by<br/> Metamarkets.</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</footer>
|
|
||||||
<script type="text/javascript">
|
|
||||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
|
||||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
|
||||||
</script>
|
|
||||||
<script type="text/javascript">
|
|
||||||
try {
|
|
||||||
var pageTracker = _gat._getTracker("UA-40280432-1");
|
|
||||||
pageTracker._trackPageview();
|
|
||||||
} catch(err) {}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
.doc-content pre, .doc-content pre code {
|
||||||
|
overflow: auto;
|
||||||
|
white-space: pre;
|
||||||
|
word-wrap: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content p {
|
||||||
|
margin: 18px 0 18px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** HACK: This is a horrible hack, but I have no clue why images don't want to stay in the container **/
|
||||||
|
.doc-content img {
|
||||||
|
max-width: 847.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content code {
|
||||||
|
background-color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content pre code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table,
|
||||||
|
.doc-content pre {
|
||||||
|
margin: 35px 0 35px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table,
|
||||||
|
.doc-content table > thead > tr > th,
|
||||||
|
.doc-content table > tbody > tr > th,
|
||||||
|
.doc-content table > tfoot > tr > th,
|
||||||
|
.doc-content table > thead > tr > td,
|
||||||
|
.doc-content table > tbody > tr > td,
|
||||||
|
.doc-content table > tfoot > tr > td {
|
||||||
|
border: 1px solid #dddddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table > thead > tr > th,
|
||||||
|
.doc-content table > thead > tr > td {
|
||||||
|
border-bottom-width: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table > tbody > tr:nth-child(odd) > td,
|
||||||
|
.doc-content table > tbody > tr:nth-child(odd) > th {
|
||||||
|
background-color: #f9f9f9;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table > tbody > tr:hover > td,
|
||||||
|
.doc-content table > tbody > tr:hover > th {
|
||||||
|
background-color: #d5d5d5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content table code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
|
@ -1,8 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
<div class="row-fluid">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
|
@ -1,160 +0,0 @@
|
||||||
/*****************************************************************************/
|
|
||||||
/*
|
|
||||||
/* Common
|
|
||||||
/*
|
|
||||||
/*****************************************************************************/
|
|
||||||
|
|
||||||
/* Global Reset */
|
|
||||||
* {
|
|
||||||
margin: 0;
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
html, body { height: 100%; }
|
|
||||||
|
|
||||||
body {
|
|
||||||
background-color: #FFF;
|
|
||||||
font: 13.34px Helvetica, Arial, sans-serif;
|
|
||||||
font-size: small;
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
h1, h2, h3, h4, h5, h6 {
|
|
||||||
font-size: 100%; }
|
|
||||||
|
|
||||||
h1 { margin-bottom: 1em; }
|
|
||||||
p { margin: 1em 0; }
|
|
||||||
|
|
||||||
a { color: #00a; }
|
|
||||||
a:hover { color: #000; }
|
|
||||||
a:visited { color: #a0a; }
|
|
||||||
|
|
||||||
/*****************************************************************************/
|
|
||||||
/*
|
|
||||||
/* Home
|
|
||||||
/*
|
|
||||||
/*****************************************************************************/
|
|
||||||
ul.posts {
|
|
||||||
list-style-type: none;
|
|
||||||
margin-bottom: 2em;
|
|
||||||
}
|
|
||||||
|
|
||||||
ul.posts li {
|
|
||||||
line-height: 1.75em;
|
|
||||||
}
|
|
||||||
|
|
||||||
ul.posts span {
|
|
||||||
color: #aaa;
|
|
||||||
font-family: Monaco, "Courier New", monospace;
|
|
||||||
font-size: 80%;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*****************************************************************************/
|
|
||||||
/*
|
|
||||||
/* Site
|
|
||||||
/*
|
|
||||||
/*****************************************************************************/
|
|
||||||
|
|
||||||
.site {
|
|
||||||
font-size: 115%;
|
|
||||||
text-align: justify;
|
|
||||||
width: 42em;
|
|
||||||
margin: 3em auto 2em;
|
|
||||||
line-height: 1.5em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header a {
|
|
||||||
font-weight: bold;
|
|
||||||
text-decoration: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header h1.title {
|
|
||||||
display: inline-block;
|
|
||||||
margin-bottom: 2em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header h1.title a {
|
|
||||||
color: #a00;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header h1.title a:hover {
|
|
||||||
color: #000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header a.extra {
|
|
||||||
color: #aaa;
|
|
||||||
margin-left: 1em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .header a.extra:hover {
|
|
||||||
color: #000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .meta {
|
|
||||||
color: #aaa;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .footer {
|
|
||||||
font-size: 80%;
|
|
||||||
color: #666;
|
|
||||||
border-top: 4px solid #eee;
|
|
||||||
margin-top: 2em;
|
|
||||||
overflow: hidden;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .footer .contact {
|
|
||||||
float: left;
|
|
||||||
margin-right: 3em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .footer .contact a {
|
|
||||||
color: #8085C1;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .footer .rss {
|
|
||||||
margin-top: 1.1em;
|
|
||||||
margin-right: -.2em;
|
|
||||||
float: right;
|
|
||||||
}
|
|
||||||
|
|
||||||
.site .footer .rss img {
|
|
||||||
border: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*****************************************************************************/
|
|
||||||
/*
|
|
||||||
/* Posts
|
|
||||||
/*
|
|
||||||
/*****************************************************************************/
|
|
||||||
|
|
||||||
/* standard */
|
|
||||||
.post pre {
|
|
||||||
border: 1px solid #ddd;
|
|
||||||
background-color: #eef;
|
|
||||||
padding: 0 .4em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post ul, .post ol {
|
|
||||||
margin-left: 1.35em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post code {
|
|
||||||
border: 1px solid #ddd;
|
|
||||||
background-color: #eef;
|
|
||||||
padding: 0 .2em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post pre code {
|
|
||||||
border: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* terminal */
|
|
||||||
.post pre.terminal {
|
|
||||||
border: 1px solid #000;
|
|
||||||
background-color: #333;
|
|
||||||
color: #FFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post pre.terminal code {
|
|
||||||
background-color: #333;
|
|
||||||
}
|
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
---
|
|
||||||
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span10 offset1{% if page.id != 'home' %} simple-page{% endif %}{% if page.sectionid == 'faq' %} faq-page{% endif %}">
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
|
@ -1,96 +0,0 @@
|
||||||
<!--
|
|
||||||
PIE: CSS3 rendering for IE
|
|
||||||
Version 1.0.0
|
|
||||||
http://css3pie.com
|
|
||||||
Dual-licensed for use under the Apache License Version 2.0 or the General Public License (GPL) Version 2.
|
|
||||||
-->
|
|
||||||
<PUBLIC:COMPONENT lightWeight="true">
|
|
||||||
<!-- saved from url=(0014)about:internet -->
|
|
||||||
<PUBLIC:ATTACH EVENT="oncontentready" FOR="element" ONEVENT="init()" />
|
|
||||||
<PUBLIC:ATTACH EVENT="ondocumentready" FOR="element" ONEVENT="init()" />
|
|
||||||
<PUBLIC:ATTACH EVENT="ondetach" FOR="element" ONEVENT="cleanup()" />
|
|
||||||
|
|
||||||
<script type="text/javascript">
|
|
||||||
var doc = element.document;var f=window.PIE;
|
|
||||||
if(!f){f=window.PIE={F:"-pie-",nb:"Pie",La:"pie_",Ac:{TD:1,TH:1},cc:{TABLE:1,THEAD:1,TBODY:1,TFOOT:1,TR:1,INPUT:1,TEXTAREA:1,SELECT:1,OPTION:1,IMG:1,HR:1},fc:{A:1,INPUT:1,TEXTAREA:1,SELECT:1,BUTTON:1},Gd:{submit:1,button:1,reset:1},aa:function(){}};try{doc.execCommand("BackgroundImageCache",false,true)}catch(aa){}for(var ba=4,Z=doc.createElement("div"),ca=Z.getElementsByTagName("i"),ga;Z.innerHTML="<!--[if gt IE "+ ++ba+"]><i></i><![endif]--\>",ca[0];);f.O=ba;if(ba===6)f.F=f.F.replace(/^-/,"");f.ja=
|
|
||||||
doc.documentMode||f.O;Z.innerHTML='<v:shape adj="1"/>';ga=Z.firstChild;ga.style.behavior="url(#default#VML)";f.zc=typeof ga.adj==="object";(function(){var a,b=0,c={};f.p={Za:function(d){if(!a){a=doc.createDocumentFragment();a.namespaces.add("css3vml","urn:schemas-microsoft-com:vml")}return a.createElement("css3vml:"+d)},Ba:function(d){return d&&d._pieId||(d._pieId="_"+ ++b)},Eb:function(d){var e,g,j,i,h=arguments;e=1;for(g=h.length;e<g;e++){i=h[e];for(j in i)if(i.hasOwnProperty(j))d[j]=i[j]}return d},
|
|
||||||
Rb:function(d,e,g){var j=c[d],i,h;if(j)Object.prototype.toString.call(j)==="[object Array]"?j.push([e,g]):e.call(g,j);else{h=c[d]=[[e,g]];i=new Image;i.onload=function(){j=c[d]={h:i.width,f:i.height};for(var k=0,n=h.length;k<n;k++)h[k][0].call(h[k][1],j);i.onload=null};i.src=d}}}})();f.Na={gc:function(a,b,c,d){function e(){k=j>=90&&j<270?b:0;n=j<180?c:0;m=b-k;p=c-n}function g(){for(;j<0;)j+=360;j%=360}var j=d.sa;d=d.zb;var i,h,k,n,m,p,r,t;if(d){d=d.coords(a,b,c);i=d.x;h=d.y}if(j){j=j.jd();g();e();
|
|
||||||
if(!d){i=k;h=n}d=f.Na.tc(i,h,j,m,p);a=d[0];d=d[1]}else if(d){a=b-i;d=c-h}else{i=h=a=0;d=c}r=a-i;t=d-h;if(j===void 0){j=!r?t<0?90:270:!t?r<0?180:0:-Math.atan2(t,r)/Math.PI*180;g();e()}return{sa:j,xc:i,yc:h,td:a,ud:d,Wd:k,Xd:n,rd:m,sd:p,kd:r,ld:t,rc:f.Na.dc(i,h,a,d)}},tc:function(a,b,c,d,e){if(c===0||c===180)return[d,b];else if(c===90||c===270)return[a,e];else{c=Math.tan(-c*Math.PI/180);a=c*a-b;b=-1/c;d=b*d-e;e=b-c;return[(d-a)/e,(c*d-b*a)/e]}},dc:function(a,b,c,d){a=c-a;b=d-b;return Math.abs(a===0?
|
|
||||||
b:b===0?a:Math.sqrt(a*a+b*b))}};f.ea=function(){this.Gb=[];this.oc={}};f.ea.prototype={ba:function(a){var b=f.p.Ba(a),c=this.oc,d=this.Gb;if(!(b in c)){c[b]=d.length;d.push(a)}},Ha:function(a){a=f.p.Ba(a);var b=this.oc;if(a&&a in b){delete this.Gb[b[a]];delete b[a]}},xa:function(){for(var a=this.Gb,b=a.length;b--;)a[b]&&a[b]()}};f.Oa=new f.ea;f.Oa.Rd=function(){var a=this,b;if(!a.Sd){b=doc.documentElement.currentStyle.getAttribute(f.F+"poll-interval")||250;(function c(){a.xa();setTimeout(c,b)})();
|
|
||||||
a.Sd=1}};(function(){function a(){f.L.xa();window.detachEvent("onunload",a);window.PIE=null}f.L=new f.ea;window.attachEvent("onunload",a);f.L.ta=function(b,c,d){b.attachEvent(c,d);this.ba(function(){b.detachEvent(c,d)})}})();f.Qa=new f.ea;f.L.ta(window,"onresize",function(){f.Qa.xa()});(function(){function a(){f.mb.xa()}f.mb=new f.ea;f.L.ta(window,"onscroll",a);f.Qa.ba(a)})();(function(){function a(){c=f.kb.md()}function b(){if(c){for(var d=0,e=c.length;d<e;d++)f.attach(c[d]);c=0}}var c;if(f.ja<9){f.L.ta(window,
|
|
||||||
"onbeforeprint",a);f.L.ta(window,"onafterprint",b)}})();f.lb=new f.ea;f.L.ta(doc,"onmouseup",function(){f.lb.xa()});f.he=function(){function a(h){this.Y=h}var b=doc.createElement("length-calc"),c=doc.body||doc.documentElement,d=b.style,e={},g=["mm","cm","in","pt","pc"],j=g.length,i={};d.position="absolute";d.top=d.left="-9999px";for(c.appendChild(b);j--;){d.width="100"+g[j];e[g[j]]=b.offsetWidth/100}c.removeChild(b);d.width="1em";a.prototype={Kb:/(px|em|ex|mm|cm|in|pt|pc|%)$/,ic:function(){var h=
|
|
||||||
this.Jd;if(h===void 0)h=this.Jd=parseFloat(this.Y);return h},yb:function(){var h=this.ae;if(!h)h=this.ae=(h=this.Y.match(this.Kb))&&h[0]||"px";return h},a:function(h,k){var n=this.ic(),m=this.yb();switch(m){case "px":return n;case "%":return n*(typeof k==="function"?k():k)/100;case "em":return n*this.xb(h);case "ex":return n*this.xb(h)/2;default:return n*e[m]}},xb:function(h){var k=h.currentStyle.fontSize,n,m;if(k.indexOf("px")>0)return parseFloat(k);else if(h.tagName in f.cc){m=this;n=h.parentNode;
|
|
||||||
return f.n(k).a(n,function(){return m.xb(n)})}else{h.appendChild(b);k=b.offsetWidth;b.parentNode===h&&h.removeChild(b);return k}}};f.n=function(h){return i[h]||(i[h]=new a(h))};return a}();f.Ja=function(){function a(e){this.X=e}var b=f.n("50%"),c={top:1,center:1,bottom:1},d={left:1,center:1,right:1};a.prototype={zd:function(){if(!this.ac){var e=this.X,g=e.length,j=f.v,i=j.qa,h=f.n("0");i=i.na;h=["left",h,"top",h];if(g===1){e.push(new j.ob(i,"center"));g++}if(g===2){i&(e[0].k|e[1].k)&&e[0].d in c&&
|
|
||||||
e[1].d in d&&e.push(e.shift());if(e[0].k&i)if(e[0].d==="center")h[1]=b;else h[0]=e[0].d;else if(e[0].W())h[1]=f.n(e[0].d);if(e[1].k&i)if(e[1].d==="center")h[3]=b;else h[2]=e[1].d;else if(e[1].W())h[3]=f.n(e[1].d)}this.ac=h}return this.ac},coords:function(e,g,j){var i=this.zd(),h=i[1].a(e,g);e=i[3].a(e,j);return{x:i[0]==="right"?g-h:h,y:i[2]==="bottom"?j-e:e}}};return a}();f.Ka=function(){function a(b,c){this.h=b;this.f=c}a.prototype={a:function(b,c,d,e,g){var j=this.h,i=this.f,h=c/d;e=e/g;if(j===
|
|
||||||
"contain"){j=e>h?c:d*e;i=e>h?c/e:d}else if(j==="cover"){j=e<h?c:d*e;i=e<h?c/e:d}else if(j==="auto"){i=i==="auto"?g:i.a(b,d);j=i*e}else{j=j.a(b,c);i=i==="auto"?j/e:i.a(b,d)}return{h:j,f:i}}};a.Kc=new a("auto","auto");return a}();f.Ec=function(){function a(b){this.Y=b}a.prototype={Kb:/[a-z]+$/i,yb:function(){return this.ad||(this.ad=this.Y.match(this.Kb)[0].toLowerCase())},jd:function(){var b=this.Vc,c;if(b===undefined){b=this.yb();c=parseFloat(this.Y,10);b=this.Vc=b==="deg"?c:b==="rad"?c/Math.PI*180:
|
|
||||||
b==="grad"?c/400*360:b==="turn"?c*360:0}return b}};return a}();f.Jc=function(){function a(c){this.Y=c}var b={};a.Qd=/\s*rgba\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d+|\d*\.\d+)\s*\)\s*/;a.Fb={aliceblue:"F0F8FF",antiquewhite:"FAEBD7",aqua:"0FF",aquamarine:"7FFFD4",azure:"F0FFFF",beige:"F5F5DC",bisque:"FFE4C4",black:"000",blanchedalmond:"FFEBCD",blue:"00F",blueviolet:"8A2BE2",brown:"A52A2A",burlywood:"DEB887",cadetblue:"5F9EA0",chartreuse:"7FFF00",chocolate:"D2691E",coral:"FF7F50",cornflowerblue:"6495ED",
|
|
||||||
cornsilk:"FFF8DC",crimson:"DC143C",cyan:"0FF",darkblue:"00008B",darkcyan:"008B8B",darkgoldenrod:"B8860B",darkgray:"A9A9A9",darkgreen:"006400",darkkhaki:"BDB76B",darkmagenta:"8B008B",darkolivegreen:"556B2F",darkorange:"FF8C00",darkorchid:"9932CC",darkred:"8B0000",darksalmon:"E9967A",darkseagreen:"8FBC8F",darkslateblue:"483D8B",darkslategray:"2F4F4F",darkturquoise:"00CED1",darkviolet:"9400D3",deeppink:"FF1493",deepskyblue:"00BFFF",dimgray:"696969",dodgerblue:"1E90FF",firebrick:"B22222",floralwhite:"FFFAF0",
|
|
||||||
forestgreen:"228B22",fuchsia:"F0F",gainsboro:"DCDCDC",ghostwhite:"F8F8FF",gold:"FFD700",goldenrod:"DAA520",gray:"808080",green:"008000",greenyellow:"ADFF2F",honeydew:"F0FFF0",hotpink:"FF69B4",indianred:"CD5C5C",indigo:"4B0082",ivory:"FFFFF0",khaki:"F0E68C",lavender:"E6E6FA",lavenderblush:"FFF0F5",lawngreen:"7CFC00",lemonchiffon:"FFFACD",lightblue:"ADD8E6",lightcoral:"F08080",lightcyan:"E0FFFF",lightgoldenrodyellow:"FAFAD2",lightgreen:"90EE90",lightgrey:"D3D3D3",lightpink:"FFB6C1",lightsalmon:"FFA07A",
|
|
||||||
lightseagreen:"20B2AA",lightskyblue:"87CEFA",lightslategray:"789",lightsteelblue:"B0C4DE",lightyellow:"FFFFE0",lime:"0F0",limegreen:"32CD32",linen:"FAF0E6",magenta:"F0F",maroon:"800000",mediumauqamarine:"66CDAA",mediumblue:"0000CD",mediumorchid:"BA55D3",mediumpurple:"9370D8",mediumseagreen:"3CB371",mediumslateblue:"7B68EE",mediumspringgreen:"00FA9A",mediumturquoise:"48D1CC",mediumvioletred:"C71585",midnightblue:"191970",mintcream:"F5FFFA",mistyrose:"FFE4E1",moccasin:"FFE4B5",navajowhite:"FFDEAD",
|
|
||||||
navy:"000080",oldlace:"FDF5E6",olive:"808000",olivedrab:"688E23",orange:"FFA500",orangered:"FF4500",orchid:"DA70D6",palegoldenrod:"EEE8AA",palegreen:"98FB98",paleturquoise:"AFEEEE",palevioletred:"D87093",papayawhip:"FFEFD5",peachpuff:"FFDAB9",peru:"CD853F",pink:"FFC0CB",plum:"DDA0DD",powderblue:"B0E0E6",purple:"800080",red:"F00",rosybrown:"BC8F8F",royalblue:"4169E1",saddlebrown:"8B4513",salmon:"FA8072",sandybrown:"F4A460",seagreen:"2E8B57",seashell:"FFF5EE",sienna:"A0522D",silver:"C0C0C0",skyblue:"87CEEB",
|
|
||||||
slateblue:"6A5ACD",slategray:"708090",snow:"FFFAFA",springgreen:"00FF7F",steelblue:"4682B4",tan:"D2B48C",teal:"008080",thistle:"D8BFD8",tomato:"FF6347",turquoise:"40E0D0",violet:"EE82EE",wheat:"F5DEB3",white:"FFF",whitesmoke:"F5F5F5",yellow:"FF0",yellowgreen:"9ACD32"};a.prototype={parse:function(){if(!this.Ua){var c=this.Y,d;if(d=c.match(a.Qd)){this.Ua="rgb("+d[1]+","+d[2]+","+d[3]+")";this.Yb=parseFloat(d[4])}else{if((d=c.toLowerCase())in a.Fb)c="#"+a.Fb[d];this.Ua=c;this.Yb=c==="transparent"?0:
|
|
||||||
1}}},U:function(c){this.parse();return this.Ua==="currentColor"?c.currentStyle.color:this.Ua},fa:function(){this.parse();return this.Yb}};f.ha=function(c){return b[c]||(b[c]=new a(c))};return a}();f.v=function(){function a(c){this.$a=c;this.ch=0;this.X=[];this.Ga=0}var b=a.qa={Ia:1,Wb:2,z:4,Lc:8,Xb:16,na:32,K:64,oa:128,pa:256,Ra:512,Tc:1024,URL:2048};a.ob=function(c,d){this.k=c;this.d=d};a.ob.prototype={Ca:function(){return this.k&b.K||this.k&b.oa&&this.d==="0"},W:function(){return this.Ca()||this.k&
|
|
||||||
b.Ra}};a.prototype={de:/\s/,Kd:/^[\+\-]?(\d*\.)?\d+/,url:/^url\(\s*("([^"]*)"|'([^']*)'|([!#$%&*-~]*))\s*\)/i,nc:/^\-?[_a-z][\w-]*/i,Yd:/^("([^"]*)"|'([^']*)')/,Bd:/^#([\da-f]{6}|[\da-f]{3})/i,be:{px:b.K,em:b.K,ex:b.K,mm:b.K,cm:b.K,"in":b.K,pt:b.K,pc:b.K,deg:b.Ia,rad:b.Ia,grad:b.Ia},fd:{rgb:1,rgba:1,hsl:1,hsla:1},next:function(c){function d(p,r){p=new a.ob(p,r);if(!c){k.X.push(p);k.Ga++}return p}function e(){k.Ga++;return null}var g,j,i,h,k=this;if(this.Ga<this.X.length)return this.X[this.Ga++];for(;this.de.test(this.$a.charAt(this.ch));)this.ch++;
|
|
||||||
if(this.ch>=this.$a.length)return e();j=this.ch;g=this.$a.substring(this.ch);i=g.charAt(0);switch(i){case "#":if(h=g.match(this.Bd)){this.ch+=h[0].length;return d(b.z,h[0])}break;case '"':case "'":if(h=g.match(this.Yd)){this.ch+=h[0].length;return d(b.Tc,h[2]||h[3]||"")}break;case "/":case ",":this.ch++;return d(b.pa,i);case "u":if(h=g.match(this.url)){this.ch+=h[0].length;return d(b.URL,h[2]||h[3]||h[4]||"")}}if(h=g.match(this.Kd)){i=h[0];this.ch+=i.length;if(g.charAt(i.length)==="%"){this.ch++;
|
|
||||||
return d(b.Ra,i+"%")}if(h=g.substring(i.length).match(this.nc)){i+=h[0];this.ch+=h[0].length;return d(this.be[h[0].toLowerCase()]||b.Lc,i)}return d(b.oa,i)}if(h=g.match(this.nc)){i=h[0];this.ch+=i.length;if(i.toLowerCase()in f.Jc.Fb||i==="currentColor"||i==="transparent")return d(b.z,i);if(g.charAt(i.length)==="("){this.ch++;if(i.toLowerCase()in this.fd){g=function(p){return p&&p.k&b.oa};h=function(p){return p&&p.k&(b.oa|b.Ra)};var n=function(p,r){return p&&p.d===r},m=function(){return k.next(1)};
|
|
||||||
if((i.charAt(0)==="r"?h(m()):g(m()))&&n(m(),",")&&h(m())&&n(m(),",")&&h(m())&&(i==="rgb"||i==="hsa"||n(m(),",")&&g(m()))&&n(m(),")"))return d(b.z,this.$a.substring(j,this.ch));return e()}return d(b.Xb,i)}return d(b.na,i)}this.ch++;return d(b.Wb,i)},D:function(){return this.X[this.Ga-- -2]},all:function(){for(;this.next(););return this.X},ma:function(c,d){for(var e=[],g,j;g=this.next();){if(c(g)){j=true;this.D();break}e.push(g)}return d&&!j?null:e}};return a}();var ha=function(a){this.e=a};ha.prototype=
|
|
||||||
{Z:0,Od:function(){var a=this.qb,b;return!a||(b=this.o())&&(a.x!==b.x||a.y!==b.y)},Td:function(){var a=this.qb,b;return!a||(b=this.o())&&(a.h!==b.h||a.f!==b.f)},hc:function(){var a=this.e,b=a.getBoundingClientRect(),c=f.ja===9,d=f.O===7,e=b.right-b.left;return{x:b.left,y:b.top,h:c||d?a.offsetWidth:e,f:c||d?a.offsetHeight:b.bottom-b.top,Hd:d&&e?a.offsetWidth/e:1}},o:function(){return this.Z?this.Va||(this.Va=this.hc()):this.hc()},Ad:function(){return!!this.qb},cb:function(){++this.Z},hb:function(){if(!--this.Z){if(this.Va)this.qb=
|
|
||||||
this.Va;this.Va=null}}};(function(){function a(b){var c=f.p.Ba(b);return function(){if(this.Z){var d=this.$b||(this.$b={});return c in d?d[c]:(d[c]=b.call(this))}else return b.call(this)}}f.B={Z:0,ka:function(b){function c(d){this.e=d;this.Zb=this.ia()}f.p.Eb(c.prototype,f.B,b);c.$c={};return c},j:function(){var b=this.ia(),c=this.constructor.$c;return b?b in c?c[b]:(c[b]=this.la(b)):null},ia:a(function(){var b=this.e,c=this.constructor,d=b.style;b=b.currentStyle;var e=this.wa,g=this.Fa,j=c.Yc||(c.Yc=
|
|
||||||
f.F+e);c=c.Zc||(c.Zc=f.nb+g.charAt(0).toUpperCase()+g.substring(1));return d[c]||b.getAttribute(j)||d[g]||b.getAttribute(e)}),i:a(function(){return!!this.j()}),H:a(function(){var b=this.ia(),c=b!==this.Zb;this.Zb=b;return c}),va:a,cb:function(){++this.Z},hb:function(){--this.Z||delete this.$b}}})();f.Sb=f.B.ka({wa:f.F+"background",Fa:f.nb+"Background",cd:{scroll:1,fixed:1,local:1},fb:{"repeat-x":1,"repeat-y":1,repeat:1,"no-repeat":1},sc:{"padding-box":1,"border-box":1,"content-box":1},Pd:{top:1,right:1,
|
|
||||||
bottom:1,left:1,center:1},Ud:{contain:1,cover:1},eb:{Ma:"backgroundClip",z:"backgroundColor",da:"backgroundImage",Pa:"backgroundOrigin",S:"backgroundPosition",T:"backgroundRepeat",Sa:"backgroundSize"},la:function(a){function b(s){return s&&s.W()||s.k&k&&s.d in t}function c(s){return s&&(s.W()&&f.n(s.d)||s.d==="auto"&&"auto")}var d=this.e.currentStyle,e,g,j,i=f.v.qa,h=i.pa,k=i.na,n=i.z,m,p,r=0,t=this.Pd,v,l,q={M:[]};if(this.wb()){e=new f.v(a);for(j={};g=e.next();){m=g.k;p=g.d;if(!j.P&&m&i.Xb&&p===
|
|
||||||
"linear-gradient"){v={ca:[],P:p};for(l={};g=e.next();){m=g.k;p=g.d;if(m&i.Wb&&p===")"){l.color&&v.ca.push(l);v.ca.length>1&&f.p.Eb(j,v);break}if(m&n){if(v.sa||v.zb){g=e.D();if(g.k!==h)break;e.next()}l={color:f.ha(p)};g=e.next();if(g.W())l.db=f.n(g.d);else e.D()}else if(m&i.Ia&&!v.sa&&!l.color&&!v.ca.length)v.sa=new f.Ec(g.d);else if(b(g)&&!v.zb&&!l.color&&!v.ca.length){e.D();v.zb=new f.Ja(e.ma(function(s){return!b(s)},false))}else if(m&h&&p===","){if(l.color){v.ca.push(l);l={}}}else break}}else if(!j.P&&
|
|
||||||
m&i.URL){j.Ab=p;j.P="image"}else if(b(g)&&!j.$){e.D();j.$=new f.Ja(e.ma(function(s){return!b(s)},false))}else if(m&k)if(p in this.fb&&!j.bb)j.bb=p;else if(p in this.sc&&!j.Wa){j.Wa=p;if((g=e.next())&&g.k&k&&g.d in this.sc)j.ub=g.d;else{j.ub=p;e.D()}}else if(p in this.cd&&!j.bc)j.bc=p;else return null;else if(m&n&&!q.color)q.color=f.ha(p);else if(m&h&&p==="/"&&!j.Xa&&j.$){g=e.next();if(g.k&k&&g.d in this.Ud)j.Xa=new f.Ka(g.d);else if(g=c(g)){m=c(e.next());if(!m){m=g;e.D()}j.Xa=new f.Ka(g,m)}else return null}else if(m&
|
|
||||||
h&&p===","&&j.P){j.Hb=a.substring(r,e.ch-1);r=e.ch;q.M.push(j);j={}}else return null}if(j.P){j.Hb=a.substring(r);q.M.push(j)}}else this.Bc(f.ja<9?function(){var s=this.eb,o=d[s.S+"X"],u=d[s.S+"Y"],x=d[s.da],y=d[s.z];if(y!=="transparent")q.color=f.ha(y);if(x!=="none")q.M=[{P:"image",Ab:(new f.v(x)).next().d,bb:d[s.T],$:new f.Ja((new f.v(o+" "+u)).all())}]}:function(){var s=this.eb,o=/\s*,\s*/,u=d[s.da].split(o),x=d[s.z],y,z,B,E,D,C;if(x!=="transparent")q.color=f.ha(x);if((E=u.length)&&u[0]!=="none"){x=
|
|
||||||
d[s.T].split(o);y=d[s.S].split(o);z=d[s.Pa].split(o);B=d[s.Ma].split(o);s=d[s.Sa].split(o);q.M=[];for(o=0;o<E;o++)if((D=u[o])&&D!=="none"){C=s[o].split(" ");q.M.push({Hb:D+" "+x[o]+" "+y[o]+" / "+s[o]+" "+z[o]+" "+B[o],P:"image",Ab:(new f.v(D)).next().d,bb:x[o],$:new f.Ja((new f.v(y[o])).all()),Wa:z[o],ub:B[o],Xa:new f.Ka(C[0],C[1])})}}});return q.color||q.M[0]?q:null},Bc:function(a){var b=f.ja>8,c=this.eb,d=this.e.runtimeStyle,e=d[c.da],g=d[c.z],j=d[c.T],i,h,k,n;if(e)d[c.da]="";if(g)d[c.z]="";if(j)d[c.T]=
|
|
||||||
"";if(b){i=d[c.Ma];h=d[c.Pa];n=d[c.S];k=d[c.Sa];if(i)d[c.Ma]="";if(h)d[c.Pa]="";if(n)d[c.S]="";if(k)d[c.Sa]=""}a=a.call(this);if(e)d[c.da]=e;if(g)d[c.z]=g;if(j)d[c.T]=j;if(b){if(i)d[c.Ma]=i;if(h)d[c.Pa]=h;if(n)d[c.S]=n;if(k)d[c.Sa]=k}return a},ia:f.B.va(function(){return this.wb()||this.Bc(function(){var a=this.e.currentStyle,b=this.eb;return a[b.z]+" "+a[b.da]+" "+a[b.T]+" "+a[b.S+"X"]+" "+a[b.S+"Y"]})}),wb:f.B.va(function(){var a=this.e;return a.style[this.Fa]||a.currentStyle.getAttribute(this.wa)}),
|
|
||||||
qc:function(){var a=0;if(f.O<7){a=this.e;a=""+(a.style[f.nb+"PngFix"]||a.currentStyle.getAttribute(f.F+"png-fix"))==="true"}return a},i:f.B.va(function(){return(this.wb()||this.qc())&&!!this.j()})});f.Vb=f.B.ka({wc:["Top","Right","Bottom","Left"],Id:{thin:"1px",medium:"3px",thick:"5px"},la:function(){var a={},b={},c={},d=false,e=true,g=true,j=true;this.Cc(function(){for(var i=this.e.currentStyle,h=0,k,n,m,p,r,t,v;h<4;h++){m=this.wc[h];v=m.charAt(0).toLowerCase();k=b[v]=i["border"+m+"Style"];n=i["border"+
|
|
||||||
m+"Color"];m=i["border"+m+"Width"];if(h>0){if(k!==p)g=false;if(n!==r)e=false;if(m!==t)j=false}p=k;r=n;t=m;c[v]=f.ha(n);m=a[v]=f.n(b[v]==="none"?"0":this.Id[m]||m);if(m.a(this.e)>0)d=true}});return d?{J:a,Zd:b,gd:c,ee:j,hd:e,$d:g}:null},ia:f.B.va(function(){var a=this.e,b=a.currentStyle,c;a.tagName in f.Ac&&a.offsetParent.currentStyle.borderCollapse==="collapse"||this.Cc(function(){c=b.borderWidth+"|"+b.borderStyle+"|"+b.borderColor});return c}),Cc:function(a){var b=this.e.runtimeStyle,c=b.borderWidth,
|
|
||||||
d=b.borderColor;if(c)b.borderWidth="";if(d)b.borderColor="";a=a.call(this);if(c)b.borderWidth=c;if(d)b.borderColor=d;return a}});(function(){f.jb=f.B.ka({wa:"border-radius",Fa:"borderRadius",la:function(b){var c=null,d,e,g,j,i=false;if(b){e=new f.v(b);var h=function(){for(var k=[],n;(g=e.next())&&g.W();){j=f.n(g.d);n=j.ic();if(n<0)return null;if(n>0)i=true;k.push(j)}return k.length>0&&k.length<5?{tl:k[0],tr:k[1]||k[0],br:k[2]||k[0],bl:k[3]||k[1]||k[0]}:null};if(b=h()){if(g){if(g.k&f.v.qa.pa&&g.d===
|
|
||||||
"/")d=h()}else d=b;if(i&&b&&d)c={x:b,y:d}}}return c}});var a=f.n("0");a={tl:a,tr:a,br:a,bl:a};f.jb.Dc={x:a,y:a}})();f.Ub=f.B.ka({wa:"border-image",Fa:"borderImage",fb:{stretch:1,round:1,repeat:1,space:1},la:function(a){var b=null,c,d,e,g,j,i,h=0,k=f.v.qa,n=k.na,m=k.oa,p=k.Ra;if(a){c=new f.v(a);b={};for(var r=function(l){return l&&l.k&k.pa&&l.d==="/"},t=function(l){return l&&l.k&n&&l.d==="fill"},v=function(){g=c.ma(function(l){return!(l.k&(m|p))});if(t(c.next())&&!b.fill)b.fill=true;else c.D();if(r(c.next())){h++;
|
|
||||||
j=c.ma(function(l){return!l.W()&&!(l.k&n&&l.d==="auto")});if(r(c.next())){h++;i=c.ma(function(l){return!l.Ca()})}}else c.D()};a=c.next();){d=a.k;e=a.d;if(d&(m|p)&&!g){c.D();v()}else if(t(a)&&!b.fill){b.fill=true;v()}else if(d&n&&this.fb[e]&&!b.repeat){b.repeat={f:e};if(a=c.next())if(a.k&n&&this.fb[a.d])b.repeat.Ob=a.d;else c.D()}else if(d&k.URL&&!b.src)b.src=e;else return null}if(!b.src||!g||g.length<1||g.length>4||j&&j.length>4||h===1&&j.length<1||i&&i.length>4||h===2&&i.length<1)return null;if(!b.repeat)b.repeat=
|
|
||||||
{f:"stretch"};if(!b.repeat.Ob)b.repeat.Ob=b.repeat.f;a=function(l,q){return{t:q(l[0]),r:q(l[1]||l[0]),b:q(l[2]||l[0]),l:q(l[3]||l[1]||l[0])}};b.slice=a(g,function(l){return f.n(l.k&m?l.d+"px":l.d)});if(j&&j[0])b.J=a(j,function(l){return l.W()?f.n(l.d):l.d});if(i&&i[0])b.Da=a(i,function(l){return l.Ca()?f.n(l.d):l.d})}return b}});f.Ic=f.B.ka({wa:"box-shadow",Fa:"boxShadow",la:function(a){var b,c=f.n,d=f.v.qa,e;if(a){e=new f.v(a);b={Da:[],Bb:[]};for(a=function(){for(var g,j,i,h,k,n;g=e.next();){i=g.d;
|
|
||||||
j=g.k;if(j&d.pa&&i===",")break;else if(g.Ca()&&!k){e.D();k=e.ma(function(m){return!m.Ca()})}else if(j&d.z&&!h)h=i;else if(j&d.na&&i==="inset"&&!n)n=true;else return false}g=k&&k.length;if(g>1&&g<5){(n?b.Bb:b.Da).push({fe:c(k[0].d),ge:c(k[1].d),blur:c(k[2]?k[2].d:"0"),Vd:c(k[3]?k[3].d:"0"),color:f.ha(h||"currentColor")});return true}return false};a(););}return b&&(b.Bb.length||b.Da.length)?b:null}});f.Uc=f.B.ka({ia:f.B.va(function(){var a=this.e.currentStyle;return a.visibility+"|"+a.display}),la:function(){var a=
|
|
||||||
this.e,b=a.runtimeStyle;a=a.currentStyle;var c=b.visibility,d;b.visibility="";d=a.visibility;b.visibility=c;return{ce:d!=="hidden",nd:a.display!=="none"}},i:function(){return false}});f.u={R:function(a){function b(c,d,e,g){this.e=c;this.s=d;this.g=e;this.parent=g}f.p.Eb(b.prototype,f.u,a);return b},Cb:false,Q:function(){return false},Ea:f.aa,Lb:function(){this.m();this.i()&&this.V()},ib:function(){this.Cb=true},Mb:function(){this.i()?this.V():this.m()},sb:function(a,b){this.vc(a);for(var c=this.ra||
|
|
||||||
(this.ra=[]),d=a+1,e=c.length,g;d<e;d++)if(g=c[d])break;c[a]=b;this.I().insertBefore(b,g||null)},za:function(a){var b=this.ra;return b&&b[a]||null},vc:function(a){var b=this.za(a),c=this.Ta;if(b&&c){c.removeChild(b);this.ra[a]=null}},Aa:function(a,b,c,d){var e=this.rb||(this.rb={}),g=e[a];if(!g){g=e[a]=f.p.Za("shape");if(b)g.appendChild(g[b]=f.p.Za(b));if(d){c=this.za(d);if(!c){this.sb(d,doc.createElement("group"+d));c=this.za(d)}}c.appendChild(g);a=g.style;a.position="absolute";a.left=a.top=0;a.behavior=
|
|
||||||
"url(#default#VML)"}return g},vb:function(a){var b=this.rb,c=b&&b[a];if(c){c.parentNode.removeChild(c);delete b[a]}return!!c},kc:function(a){var b=this.e,c=this.s.o(),d=c.h,e=c.f,g,j,i,h,k,n;c=a.x.tl.a(b,d);g=a.y.tl.a(b,e);j=a.x.tr.a(b,d);i=a.y.tr.a(b,e);h=a.x.br.a(b,d);k=a.y.br.a(b,e);n=a.x.bl.a(b,d);a=a.y.bl.a(b,e);d=Math.min(d/(c+j),e/(i+k),d/(n+h),e/(g+a));if(d<1){c*=d;g*=d;j*=d;i*=d;h*=d;k*=d;n*=d;a*=d}return{x:{tl:c,tr:j,br:h,bl:n},y:{tl:g,tr:i,br:k,bl:a}}},ya:function(a,b,c){b=b||1;var d,e,
|
|
||||||
g=this.s.o();e=g.h*b;g=g.f*b;var j=this.g.G,i=Math.floor,h=Math.ceil,k=a?a.Jb*b:0,n=a?a.Ib*b:0,m=a?a.tb*b:0;a=a?a.Db*b:0;var p,r,t,v,l;if(c||j.i()){d=this.kc(c||j.j());c=d.x.tl*b;j=d.y.tl*b;p=d.x.tr*b;r=d.y.tr*b;t=d.x.br*b;v=d.y.br*b;l=d.x.bl*b;b=d.y.bl*b;e="m"+i(a)+","+i(j)+"qy"+i(c)+","+i(k)+"l"+h(e-p)+","+i(k)+"qx"+h(e-n)+","+i(r)+"l"+h(e-n)+","+h(g-v)+"qy"+h(e-t)+","+h(g-m)+"l"+i(l)+","+h(g-m)+"qx"+i(a)+","+h(g-b)+" x e"}else e="m"+i(a)+","+i(k)+"l"+h(e-n)+","+i(k)+"l"+h(e-n)+","+h(g-m)+"l"+i(a)+
|
|
||||||
","+h(g-m)+"xe";return e},I:function(){var a=this.parent.za(this.N),b;if(!a){a=doc.createElement(this.Ya);b=a.style;b.position="absolute";b.top=b.left=0;this.parent.sb(this.N,a)}return a},mc:function(){var a=this.e,b=a.currentStyle,c=a.runtimeStyle,d=a.tagName,e=f.O===6,g;if(e&&(d in f.cc||d==="FIELDSET")||d==="BUTTON"||d==="INPUT"&&a.type in f.Gd){c.borderWidth="";d=this.g.w.wc;for(g=d.length;g--;){e=d[g];c["padding"+e]="";c["padding"+e]=f.n(b["padding"+e]).a(a)+f.n(b["border"+e+"Width"]).a(a)+(f.O!==
|
|
||||||
8&&g%2?1:0)}c.borderWidth=0}else if(e){if(a.childNodes.length!==1||a.firstChild.tagName!=="ie6-mask"){b=doc.createElement("ie6-mask");d=b.style;d.visibility="visible";for(d.zoom=1;d=a.firstChild;)b.appendChild(d);a.appendChild(b);c.visibility="hidden"}}else c.borderColor="transparent"},ie:function(){},m:function(){this.parent.vc(this.N);delete this.rb;delete this.ra}};f.Rc=f.u.R({i:function(){var a=this.ed;for(var b in a)if(a.hasOwnProperty(b)&&a[b].i())return true;return false},Q:function(){return this.g.Pb.H()},
|
|
||||||
ib:function(){if(this.i()){var a=this.jc(),b=a,c;a=a.currentStyle;var d=a.position,e=this.I().style,g=0,j=0;j=this.s.o();var i=j.Hd;if(d==="fixed"&&f.O>6){g=j.x*i;j=j.y*i;b=d}else{do b=b.offsetParent;while(b&&b.currentStyle.position==="static");if(b){c=b.getBoundingClientRect();b=b.currentStyle;g=(j.x-c.left)*i-(parseFloat(b.borderLeftWidth)||0);j=(j.y-c.top)*i-(parseFloat(b.borderTopWidth)||0)}else{b=doc.documentElement;g=(j.x+b.scrollLeft-b.clientLeft)*i;j=(j.y+b.scrollTop-b.clientTop)*i}b="absolute"}e.position=
|
|
||||||
b;e.left=g;e.top=j;e.zIndex=d==="static"?-1:a.zIndex;this.Cb=true}},Mb:f.aa,Nb:function(){var a=this.g.Pb.j();this.I().style.display=a.ce&&a.nd?"":"none"},Lb:function(){this.i()?this.Nb():this.m()},jc:function(){var a=this.e;return a.tagName in f.Ac?a.offsetParent:a},I:function(){var a=this.Ta,b;if(!a){b=this.jc();a=this.Ta=doc.createElement("css3-container");a.style.direction="ltr";this.Nb();b.parentNode.insertBefore(a,b)}return a},ab:f.aa,m:function(){var a=this.Ta,b;if(a&&(b=a.parentNode))b.removeChild(a);
|
|
||||||
delete this.Ta;delete this.ra}});f.Fc=f.u.R({N:2,Ya:"background",Q:function(){var a=this.g;return a.C.H()||a.G.H()},i:function(){var a=this.g;return a.q.i()||a.G.i()||a.C.i()||a.ga.i()&&a.ga.j().Bb},V:function(){var a=this.s.o();if(a.h&&a.f){this.od();this.pd()}},od:function(){var a=this.g.C.j(),b=this.s.o(),c=this.e,d=a&&a.color,e,g;if(d&&d.fa()>0){this.lc();a=this.Aa("bgColor","fill",this.I(),1);e=b.h;b=b.f;a.stroked=false;a.coordsize=e*2+","+b*2;a.coordorigin="1,1";a.path=this.ya(null,2);g=a.style;
|
|
||||||
g.width=e;g.height=b;a.fill.color=d.U(c);c=d.fa();if(c<1)a.fill.opacity=c}else this.vb("bgColor")},pd:function(){var a=this.g.C.j(),b=this.s.o();a=a&&a.M;var c,d,e,g,j;if(a){this.lc();d=b.h;e=b.f;for(j=a.length;j--;){b=a[j];c=this.Aa("bgImage"+j,"fill",this.I(),2);c.stroked=false;c.fill.type="tile";c.fillcolor="none";c.coordsize=d*2+","+e*2;c.coordorigin="1,1";c.path=this.ya(0,2);g=c.style;g.width=d;g.height=e;if(b.P==="linear-gradient")this.bd(c,b);else{c.fill.src=b.Ab;this.Nd(c,j)}}}for(j=a?a.length:
|
|
||||||
0;this.vb("bgImage"+j++););},Nd:function(a,b){var c=this;f.p.Rb(a.fill.src,function(d){var e=c.e,g=c.s.o(),j=g.h;g=g.f;if(j&&g){var i=a.fill,h=c.g,k=h.w.j(),n=k&&k.J;k=n?n.t.a(e):0;var m=n?n.r.a(e):0,p=n?n.b.a(e):0;n=n?n.l.a(e):0;h=h.C.j().M[b];e=h.$?h.$.coords(e,j-d.h-n-m,g-d.f-k-p):{x:0,y:0};h=h.bb;p=m=0;var r=j+1,t=g+1,v=f.O===8?0:1;n=Math.round(e.x)+n+0.5;k=Math.round(e.y)+k+0.5;i.position=n/j+","+k/g;i.size.x=1;i.size=d.h+"px,"+d.f+"px";if(h&&h!=="repeat"){if(h==="repeat-x"||h==="no-repeat"){m=
|
|
||||||
k+1;t=k+d.f+v}if(h==="repeat-y"||h==="no-repeat"){p=n+1;r=n+d.h+v}a.style.clip="rect("+m+"px,"+r+"px,"+t+"px,"+p+"px)"}}})},bd:function(a,b){var c=this.e,d=this.s.o(),e=d.h,g=d.f;a=a.fill;d=b.ca;var j=d.length,i=Math.PI,h=f.Na,k=h.tc,n=h.dc;b=h.gc(c,e,g,b);h=b.sa;var m=b.xc,p=b.yc,r=b.Wd,t=b.Xd,v=b.rd,l=b.sd,q=b.kd,s=b.ld;b=b.rc;e=h%90?Math.atan2(q*e/g,s)/i*180:h+90;e+=180;e%=360;v=k(r,t,h,v,l);g=n(r,t,v[0],v[1]);i=[];v=k(m,p,h,r,t);n=n(m,p,v[0],v[1])/g*100;k=[];for(h=0;h<j;h++)k.push(d[h].db?d[h].db.a(c,
|
|
||||||
b):h===0?0:h===j-1?b:null);for(h=1;h<j;h++){if(k[h]===null){m=k[h-1];b=h;do p=k[++b];while(p===null);k[h]=m+(p-m)/(b-h+1)}k[h]=Math.max(k[h],k[h-1])}for(h=0;h<j;h++)i.push(n+k[h]/g*100+"% "+d[h].color.U(c));a.angle=e;a.type="gradient";a.method="sigma";a.color=d[0].color.U(c);a.color2=d[j-1].color.U(c);if(a.colors)a.colors.value=i.join(",");else a.colors=i.join(",")},lc:function(){var a=this.e.runtimeStyle;a.backgroundImage="url(about:blank)";a.backgroundColor="transparent"},m:function(){f.u.m.call(this);
|
|
||||||
var a=this.e.runtimeStyle;a.backgroundImage=a.backgroundColor=""}});f.Gc=f.u.R({N:4,Ya:"border",Q:function(){var a=this.g;return a.w.H()||a.G.H()},i:function(){var a=this.g;return a.G.i()&&!a.q.i()&&a.w.i()},V:function(){var a=this.e,b=this.g.w.j(),c=this.s.o(),d=c.h;c=c.f;var e,g,j,i,h;if(b){this.mc();b=this.wd(2);i=0;for(h=b.length;i<h;i++){j=b[i];e=this.Aa("borderPiece"+i,j.stroke?"stroke":"fill",this.I());e.coordsize=d*2+","+c*2;e.coordorigin="1,1";e.path=j.path;g=e.style;g.width=d;g.height=c;
|
|
||||||
e.filled=!!j.fill;e.stroked=!!j.stroke;if(j.stroke){e=e.stroke;e.weight=j.Qb+"px";e.color=j.color.U(a);e.dashstyle=j.stroke==="dashed"?"2 2":j.stroke==="dotted"?"1 1":"solid";e.linestyle=j.stroke==="double"&&j.Qb>2?"ThinThin":"Single"}else e.fill.color=j.fill.U(a)}for(;this.vb("borderPiece"+i++););}},wd:function(a){var b=this.e,c,d,e,g=this.g.w,j=[],i,h,k,n,m=Math.round,p,r,t;if(g.i()){c=g.j();g=c.J;r=c.Zd;t=c.gd;if(c.ee&&c.$d&&c.hd){if(t.t.fa()>0){c=g.t.a(b);k=c/2;j.push({path:this.ya({Jb:k,Ib:k,
|
|
||||||
tb:k,Db:k},a),stroke:r.t,color:t.t,Qb:c})}}else{a=a||1;c=this.s.o();d=c.h;e=c.f;c=m(g.t.a(b));k=m(g.r.a(b));n=m(g.b.a(b));b=m(g.l.a(b));var v={t:c,r:k,b:n,l:b};b=this.g.G;if(b.i())p=this.kc(b.j());i=Math.floor;h=Math.ceil;var l=function(o,u){return p?p[o][u]:0},q=function(o,u,x,y,z,B){var E=l("x",o),D=l("y",o),C=o.charAt(1)==="r";o=o.charAt(0)==="b";return E>0&&D>0?(B?"al":"ae")+(C?h(d-E):i(E))*a+","+(o?h(e-D):i(D))*a+","+(i(E)-u)*a+","+(i(D)-x)*a+","+y*65535+","+2949075*(z?1:-1):(B?"m":"l")+(C?d-
|
|
||||||
u:u)*a+","+(o?e-x:x)*a},s=function(o,u,x,y){var z=o==="t"?i(l("x","tl"))*a+","+h(u)*a:o==="r"?h(d-u)*a+","+i(l("y","tr"))*a:o==="b"?h(d-l("x","br"))*a+","+i(e-u)*a:i(u)*a+","+h(e-l("y","bl"))*a;o=o==="t"?h(d-l("x","tr"))*a+","+h(u)*a:o==="r"?h(d-u)*a+","+h(e-l("y","br"))*a:o==="b"?i(l("x","bl"))*a+","+i(e-u)*a:i(u)*a+","+i(l("y","tl"))*a;return x?(y?"m"+o:"")+"l"+z:(y?"m"+z:"")+"l"+o};b=function(o,u,x,y,z,B){var E=o==="l"||o==="r",D=v[o],C,F;if(D>0&&r[o]!=="none"&&t[o].fa()>0){C=v[E?o:u];u=v[E?u:
|
|
||||||
o];F=v[E?o:x];x=v[E?x:o];if(r[o]==="dashed"||r[o]==="dotted"){j.push({path:q(y,C,u,B+45,0,1)+q(y,0,0,B,1,0),fill:t[o]});j.push({path:s(o,D/2,0,1),stroke:r[o],Qb:D,color:t[o]});j.push({path:q(z,F,x,B,0,1)+q(z,0,0,B-45,1,0),fill:t[o]})}else j.push({path:q(y,C,u,B+45,0,1)+s(o,D,0,0)+q(z,F,x,B,0,0)+(r[o]==="double"&&D>2?q(z,F-i(F/3),x-i(x/3),B-45,1,0)+s(o,h(D/3*2),1,0)+q(y,C-i(C/3),u-i(u/3),B,1,0)+"x "+q(y,i(C/3),i(u/3),B+45,0,1)+s(o,i(D/3),1,0)+q(z,i(F/3),i(x/3),B,0,0):"")+q(z,0,0,B-45,1,0)+s(o,0,1,
|
|
||||||
0)+q(y,0,0,B,1,0),fill:t[o]})}};b("t","l","r","tl","tr",90);b("r","t","b","tr","br",0);b("b","r","l","br","bl",-90);b("l","b","t","bl","tl",-180)}}return j},m:function(){if(this.ec||!this.g.q.i())this.e.runtimeStyle.borderColor="";f.u.m.call(this)}});f.Tb=f.u.R({N:5,Md:["t","tr","r","br","b","bl","l","tl","c"],Q:function(){return this.g.q.H()},i:function(){return this.g.q.i()},V:function(){this.I();var a=this.g.q.j(),b=this.g.w.j(),c=this.s.o(),d=this.e,e=this.uc;f.p.Rb(a.src,function(g){function j(s,
|
|
||||||
o,u,x,y){s=e[s].style;var z=Math.max;s.width=z(o,0);s.height=z(u,0);s.left=x;s.top=y}function i(s,o,u){for(var x=0,y=s.length;x<y;x++)e[s[x]].imagedata[o]=u}var h=c.h,k=c.f,n=f.n("0"),m=a.J||(b?b.J:{t:n,r:n,b:n,l:n});n=m.t.a(d);var p=m.r.a(d),r=m.b.a(d);m=m.l.a(d);var t=a.slice,v=t.t.a(d),l=t.r.a(d),q=t.b.a(d);t=t.l.a(d);j("tl",m,n,0,0);j("t",h-m-p,n,m,0);j("tr",p,n,h-p,0);j("r",p,k-n-r,h-p,n);j("br",p,r,h-p,k-r);j("b",h-m-p,r,m,k-r);j("bl",m,r,0,k-r);j("l",m,k-n-r,0,n);j("c",h-m-p,k-n-r,m,n);i(["tl",
|
|
||||||
"t","tr"],"cropBottom",(g.f-v)/g.f);i(["tl","l","bl"],"cropRight",(g.h-t)/g.h);i(["bl","b","br"],"cropTop",(g.f-q)/g.f);i(["tr","r","br"],"cropLeft",(g.h-l)/g.h);i(["l","r","c"],"cropTop",v/g.f);i(["l","r","c"],"cropBottom",q/g.f);i(["t","b","c"],"cropLeft",t/g.h);i(["t","b","c"],"cropRight",l/g.h);e.c.style.display=a.fill?"":"none"},this)},I:function(){var a=this.parent.za(this.N),b,c,d,e=this.Md,g=e.length;if(!a){a=doc.createElement("border-image");b=a.style;b.position="absolute";this.uc={};for(d=
|
|
||||||
0;d<g;d++){c=this.uc[e[d]]=f.p.Za("rect");c.appendChild(f.p.Za("imagedata"));b=c.style;b.behavior="url(#default#VML)";b.position="absolute";b.top=b.left=0;c.imagedata.src=this.g.q.j().src;c.stroked=false;c.filled=false;a.appendChild(c)}this.parent.sb(this.N,a)}return a},Ea:function(){if(this.i()){var a=this.e,b=a.runtimeStyle,c=this.g.q.j().J;b.borderStyle="solid";if(c){b.borderTopWidth=c.t.a(a)+"px";b.borderRightWidth=c.r.a(a)+"px";b.borderBottomWidth=c.b.a(a)+"px";b.borderLeftWidth=c.l.a(a)+"px"}this.mc()}},
|
|
||||||
m:function(){var a=this.e.runtimeStyle;a.borderStyle="";if(this.ec||!this.g.w.i())a.borderColor=a.borderWidth="";f.u.m.call(this)}});f.Hc=f.u.R({N:1,Ya:"outset-box-shadow",Q:function(){var a=this.g;return a.ga.H()||a.G.H()},i:function(){var a=this.g.ga;return a.i()&&a.j().Da[0]},V:function(){function a(C,F,O,H,M,P,I){C=b.Aa("shadow"+C+F,"fill",d,j-C);F=C.fill;C.coordsize=n*2+","+m*2;C.coordorigin="1,1";C.stroked=false;C.filled=true;F.color=M.U(c);if(P){F.type="gradienttitle";F.color2=F.color;F.opacity=
|
|
||||||
0}C.path=I;l=C.style;l.left=O;l.top=H;l.width=n;l.height=m;return C}var b=this,c=this.e,d=this.I(),e=this.g,g=e.ga.j().Da;e=e.G.j();var j=g.length,i=j,h,k=this.s.o(),n=k.h,m=k.f;k=f.O===8?1:0;for(var p=["tl","tr","br","bl"],r,t,v,l,q,s,o,u,x,y,z,B,E,D;i--;){t=g[i];q=t.fe.a(c);s=t.ge.a(c);h=t.Vd.a(c);o=t.blur.a(c);t=t.color;u=-h-o;if(!e&&o)e=f.jb.Dc;u=this.ya({Jb:u,Ib:u,tb:u,Db:u},2,e);if(o){x=(h+o)*2+n;y=(h+o)*2+m;z=x?o*2/x:0;B=y?o*2/y:0;if(o-h>n/2||o-h>m/2)for(h=4;h--;){r=p[h];E=r.charAt(0)==="b";
|
|
||||||
D=r.charAt(1)==="r";r=a(i,r,q,s,t,o,u);v=r.fill;v.focusposition=(D?1-z:z)+","+(E?1-B:B);v.focussize="0,0";r.style.clip="rect("+((E?y/2:0)+k)+"px,"+(D?x:x/2)+"px,"+(E?y:y/2)+"px,"+((D?x/2:0)+k)+"px)"}else{r=a(i,"",q,s,t,o,u);v=r.fill;v.focusposition=z+","+B;v.focussize=1-z*2+","+(1-B*2)}}else{r=a(i,"",q,s,t,o,u);q=t.fa();if(q<1)r.fill.opacity=q}}}});f.Pc=f.u.R({N:6,Ya:"imgEl",Q:function(){var a=this.g;return this.e.src!==this.Xc||a.G.H()},i:function(){var a=this.g;return a.G.i()||a.C.qc()},V:function(){this.Xc=
|
|
||||||
j;this.Cd();var a=this.Aa("img","fill",this.I()),b=a.fill,c=this.s.o(),d=c.h;c=c.f;var e=this.g.w.j(),g=e&&e.J;e=this.e;var j=e.src,i=Math.round,h=e.currentStyle,k=f.n;if(!g||f.O<7){g=f.n("0");g={t:g,r:g,b:g,l:g}}a.stroked=false;b.type="frame";b.src=j;b.position=(d?0.5/d:0)+","+(c?0.5/c:0);a.coordsize=d*2+","+c*2;a.coordorigin="1,1";a.path=this.ya({Jb:i(g.t.a(e)+k(h.paddingTop).a(e)),Ib:i(g.r.a(e)+k(h.paddingRight).a(e)),tb:i(g.b.a(e)+k(h.paddingBottom).a(e)),Db:i(g.l.a(e)+k(h.paddingLeft).a(e))},
|
|
||||||
2);a=a.style;a.width=d;a.height=c},Cd:function(){this.e.runtimeStyle.filter="alpha(opacity=0)"},m:function(){f.u.m.call(this);this.e.runtimeStyle.filter=""}});f.Oc=f.u.R({ib:f.aa,Mb:f.aa,Nb:f.aa,Lb:f.aa,Ld:/^,+|,+$/g,Fd:/,+/g,gb:function(a,b){(this.pb||(this.pb=[]))[a]=b||void 0},ab:function(){var a=this.pb,b;if(a&&(b=a.join(",").replace(this.Ld,"").replace(this.Fd,","))!==this.Wc)this.Wc=this.e.runtimeStyle.background=b},m:function(){this.e.runtimeStyle.background="";delete this.pb}});f.Mc=f.u.R({ua:1,
|
|
||||||
Q:function(){return this.g.C.H()},i:function(){var a=this.g;return a.C.i()||a.q.i()},V:function(){var a=this.g.C.j(),b,c,d=0,e,g;if(a){b=[];if(c=a.M)for(;e=c[d++];)if(e.P==="linear-gradient"){g=this.vd(e.Wa);g=(e.Xa||f.Ka.Kc).a(this.e,g.h,g.f,g.h,g.f);b.push("url(data:image/svg+xml,"+escape(this.xd(e,g.h,g.f))+") "+this.dd(e.$)+" / "+g.h+"px "+g.f+"px "+(e.bc||"")+" "+(e.Wa||"")+" "+(e.ub||""))}else b.push(e.Hb);a.color&&b.push(a.color.Y);this.parent.gb(this.ua,b.join(","))}},dd:function(a){return a?
|
|
||||||
a.X.map(function(b){return b.d}).join(" "):"0 0"},vd:function(a){var b=this.e,c=this.s.o(),d=c.h;c=c.f;var e;if(a!=="border-box")if((e=this.g.w.j())&&(e=e.J)){d-=e.l.a(b)+e.l.a(b);c-=e.t.a(b)+e.b.a(b)}if(a==="content-box"){a=f.n;e=b.currentStyle;d-=a(e.paddingLeft).a(b)+a(e.paddingRight).a(b);c-=a(e.paddingTop).a(b)+a(e.paddingBottom).a(b)}return{h:d,f:c}},xd:function(a,b,c){var d=this.e,e=a.ca,g=e.length,j=f.Na.gc(d,b,c,a);a=j.xc;var i=j.yc,h=j.td,k=j.ud;j=j.rc;var n,m,p,r,t;n=[];for(m=0;m<g;m++)n.push(e[m].db?
|
|
||||||
e[m].db.a(d,j):m===0?0:m===g-1?j:null);for(m=1;m<g;m++)if(n[m]===null){r=n[m-1];p=m;do t=n[++p];while(t===null);n[m]=r+(t-r)/(p-m+1)}b=['<svg width="'+b+'" height="'+c+'" xmlns="http://www.w3.org/2000/svg"><defs><linearGradient id="g" gradientUnits="userSpaceOnUse" x1="'+a/b*100+'%" y1="'+i/c*100+'%" x2="'+h/b*100+'%" y2="'+k/c*100+'%">'];for(m=0;m<g;m++)b.push('<stop offset="'+n[m]/j+'" stop-color="'+e[m].color.U(d)+'" stop-opacity="'+e[m].color.fa()+'"/>');b.push('</linearGradient></defs><rect width="100%" height="100%" fill="url(#g)"/></svg>');
|
|
||||||
return b.join("")},m:function(){this.parent.gb(this.ua)}});f.Nc=f.u.R({T:"repeat",Sc:"stretch",Qc:"round",ua:0,Q:function(){return this.g.q.H()},i:function(){return this.g.q.i()},V:function(){var a=this,b=a.g.q.j(),c=a.g.w.j(),d=a.s.o(),e=b.repeat,g=e.f,j=e.Ob,i=a.e,h=0;f.p.Rb(b.src,function(k){function n(Q,R,U,V,W,Y,X,S,w,A){K.push('<pattern patternUnits="userSpaceOnUse" id="pattern'+G+'" x="'+(g===l?Q+U/2-w/2:Q)+'" y="'+(j===l?R+V/2-A/2:R)+'" width="'+w+'" height="'+A+'"><svg width="'+w+'" height="'+
|
|
||||||
A+'" viewBox="'+W+" "+Y+" "+X+" "+S+'" preserveAspectRatio="none"><image xlink:href="'+v+'" x="0" y="0" width="'+r+'" height="'+t+'" /></svg></pattern>');J.push('<rect x="'+Q+'" y="'+R+'" width="'+U+'" height="'+V+'" fill="url(#pattern'+G+')" />');G++}var m=d.h,p=d.f,r=k.h,t=k.f,v=a.Dd(b.src,r,t),l=a.T,q=a.Sc;k=a.Qc;var s=Math.ceil,o=f.n("0"),u=b.J||(c?c.J:{t:o,r:o,b:o,l:o});o=u.t.a(i);var x=u.r.a(i),y=u.b.a(i);u=u.l.a(i);var z=b.slice,B=z.t.a(i),E=z.r.a(i),D=z.b.a(i);z=z.l.a(i);var C=m-u-x,F=p-o-
|
|
||||||
y,O=r-z-E,H=t-B-D,M=g===q?C:O*o/B,P=j===q?F:H*x/E,I=g===q?C:O*y/D;q=j===q?F:H*u/z;var K=[],J=[],G=0;if(g===k){M-=(M-(C%M||M))/s(C/M);I-=(I-(C%I||I))/s(C/I)}if(j===k){P-=(P-(F%P||P))/s(F/P);q-=(q-(F%q||q))/s(F/q)}k=['<svg width="'+m+'" height="'+p+'" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">'];n(0,0,u,o,0,0,z,B,u,o);n(u,0,C,o,z,0,O,B,M,o);n(m-x,0,x,o,r-E,0,E,B,x,o);n(0,o,u,F,0,B,z,H,u,q);if(b.fill)n(u,o,C,F,z,B,O,H,M||I||O,q||P||H);n(m-x,o,x,F,r-E,B,E,H,x,P);n(0,
|
|
||||||
p-y,u,y,0,t-D,z,D,u,y);n(u,p-y,C,y,z,t-D,O,D,I,y);n(m-x,p-y,x,y,r-E,t-D,E,D,x,y);k.push("<defs>"+K.join("\n")+"</defs>"+J.join("\n")+"</svg>");a.parent.gb(a.ua,"url(data:image/svg+xml,"+escape(k.join(""))+") no-repeat border-box border-box");h&&a.parent.ab()},a);h=1},Dd:function(){var a={};return function(b,c,d){var e=a[b],g;if(!e){e=new Image;g=doc.createElement("canvas");e.src=b;g.width=c;g.height=d;g.getContext("2d").drawImage(e,0,0);e=a[b]=g.toDataURL()}return e}}(),Ea:f.Tb.prototype.Ea,m:function(){var a=
|
|
||||||
this.e.runtimeStyle;this.parent.gb(this.ua);a.borderColor=a.borderStyle=a.borderWidth=""}});f.kb=function(){function a(l,q){l.className+=" "+q}function b(l){var q=v.slice.call(arguments,1),s=q.length;setTimeout(function(){if(l)for(;s--;)a(l,q[s])},0)}function c(l){var q=v.slice.call(arguments,1),s=q.length;setTimeout(function(){if(l)for(;s--;){var o=q[s];o=t[o]||(t[o]=new RegExp("\\b"+o+"\\b","g"));l.className=l.className.replace(o,"")}},0)}function d(l){function q(){if(!U){var w,A,L=f.ja,T=l.currentStyle,
|
|
||||||
N=T.getAttribute(g)==="true",da=T.getAttribute(i)!=="false",ea=T.getAttribute(h)!=="false";S=T.getAttribute(j);S=L>7?S!=="false":S==="true";if(!R){R=1;l.runtimeStyle.zoom=1;T=l;for(var fa=1;T=T.previousSibling;)if(T.nodeType===1){fa=0;break}fa&&a(l,p)}J.cb();if(N&&(A=J.o())&&(w=doc.documentElement||doc.body)&&(A.y>w.clientHeight||A.x>w.clientWidth||A.y+A.f<0||A.x+A.h<0)){if(!Y){Y=1;f.mb.ba(q)}}else{U=1;Y=R=0;f.mb.Ha(q);if(L===9){G={C:new f.Sb(l),q:new f.Ub(l),w:new f.Vb(l)};Q=[G.C,G.q];K=new f.Oc(l,
|
|
||||||
J,G);w=[new f.Mc(l,J,G,K),new f.Nc(l,J,G,K)]}else{G={C:new f.Sb(l),w:new f.Vb(l),q:new f.Ub(l),G:new f.jb(l),ga:new f.Ic(l),Pb:new f.Uc(l)};Q=[G.C,G.w,G.q,G.G,G.ga,G.Pb];K=new f.Rc(l,J,G);w=[new f.Hc(l,J,G,K),new f.Fc(l,J,G,K),new f.Gc(l,J,G,K),new f.Tb(l,J,G,K)];l.tagName==="IMG"&&w.push(new f.Pc(l,J,G,K));K.ed=w}I=[K].concat(w);if(w=l.currentStyle.getAttribute(f.F+"watch-ancestors")){w=parseInt(w,10);A=0;for(N=l.parentNode;N&&(w==="NaN"||A++<w);){H(N,"onpropertychange",C);H(N,"onmouseenter",x);
|
|
||||||
H(N,"onmouseleave",y);H(N,"onmousedown",z);if(N.tagName in f.fc){H(N,"onfocus",E);H(N,"onblur",D)}N=N.parentNode}}if(S){f.Oa.ba(o);f.Oa.Rd()}o(1)}if(!V){V=1;L<9&&H(l,"onmove",s);H(l,"onresize",s);H(l,"onpropertychange",u);ea&&H(l,"onmouseenter",x);if(ea||da)H(l,"onmouseleave",y);da&&H(l,"onmousedown",z);if(l.tagName in f.fc){H(l,"onfocus",E);H(l,"onblur",D)}f.Qa.ba(s);f.L.ba(M)}J.hb()}}function s(){J&&J.Ad()&&o()}function o(w){if(!X)if(U){var A,L=I.length;F();for(A=0;A<L;A++)I[A].Ea();if(w||J.Od())for(A=
|
|
||||||
0;A<L;A++)I[A].ib();if(w||J.Td())for(A=0;A<L;A++)I[A].Mb();K.ab();O()}else R||q()}function u(){var w,A=I.length,L;w=event;if(!X&&!(w&&w.propertyName in r))if(U){F();for(w=0;w<A;w++)I[w].Ea();for(w=0;w<A;w++){L=I[w];L.Cb||L.ib();L.Q()&&L.Lb()}K.ab();O()}else R||q()}function x(){b(l,k)}function y(){c(l,k,n)}function z(){b(l,n);f.lb.ba(B)}function B(){c(l,n);f.lb.Ha(B)}function E(){b(l,m)}function D(){c(l,m)}function C(){var w=event.propertyName;if(w==="className"||w==="id")u()}function F(){J.cb();for(var w=
|
|
||||||
Q.length;w--;)Q[w].cb()}function O(){for(var w=Q.length;w--;)Q[w].hb();J.hb()}function H(w,A,L){w.attachEvent(A,L);W.push([w,A,L])}function M(){if(V){for(var w=W.length,A;w--;){A=W[w];A[0].detachEvent(A[1],A[2])}f.L.Ha(M);V=0;W=[]}}function P(){if(!X){var w,A;M();X=1;if(I){w=0;for(A=I.length;w<A;w++){I[w].ec=1;I[w].m()}}S&&f.Oa.Ha(o);f.Qa.Ha(o);I=J=G=Q=l=null}}var I,K,J=new ha(l),G,Q,R,U,V,W=[],Y,X,S;this.Ed=q;this.update=o;this.m=P;this.qd=l}var e={},g=f.F+"lazy-init",j=f.F+"poll",i=f.F+"track-active",
|
|
||||||
h=f.F+"track-hover",k=f.La+"hover",n=f.La+"active",m=f.La+"focus",p=f.La+"first-child",r={background:1,bgColor:1,display:1},t={},v=[];d.yd=function(l){var q=f.p.Ba(l);return e[q]||(e[q]=new d(l))};d.m=function(l){l=f.p.Ba(l);var q=e[l];if(q){q.m();delete e[l]}};d.md=function(){var l=[],q;if(e){for(var s in e)if(e.hasOwnProperty(s)){q=e[s];l.push(q.qd);q.m()}e={}}return l};return d}();f.supportsVML=f.zc;f.attach=function(a){f.ja<10&&f.zc&&f.kb.yd(a).Ed()};f.detach=function(a){f.kb.m(a)}};
|
|
||||||
var $=element;function init(){if(doc.media!=="print"){var a=window.PIE;a&&a.attach($)}}function cleanup(){if(doc.media!=="print"){var a=window.PIE;if(a){a.detach($);$=0}}}$.readyState==="complete"&&init();
|
|
||||||
</script>
|
|
||||||
</PUBLIC:COMPONENT>
|
|
|
@ -1,44 +0,0 @@
|
||||||
---
|
|
||||||
layout: default
|
|
||||||
sectionid: blog
|
|
||||||
---
|
|
||||||
|
|
||||||
<div class="row-fluid">
|
|
||||||
<div class="span4 recent">
|
|
||||||
<h3>Recent posts</h3>
|
|
||||||
<ul class="unstyled">
|
|
||||||
{% for post in site.posts limit: 5 %}
|
|
||||||
<li{% if page.title == post.title %} class="active"{% endif %}><a href="{{ post.url }}">{{ post.title }}</a></li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="span8 simple-page">
|
|
||||||
<div class="text-item blog inner">
|
|
||||||
<h2 class="date">
|
|
||||||
<span>{{ page.title }}</span>
|
|
||||||
<span>{{ page.date | date: "%B %e, %Y" }} · {{ page.author | upcase }}</span>
|
|
||||||
</h2>
|
|
||||||
|
|
||||||
{% if page.image %}<img src="{{ page.image }}" alt="{{ page.title }}" class="text-img" />{% endif %}
|
|
||||||
|
|
||||||
{{ content }}
|
|
||||||
|
|
||||||
<div id="disqus_thread"></div>
|
|
||||||
<script type="text/javascript">
|
|
||||||
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
|
|
||||||
var disqus_shortname = 'druidio'; // required: replace example with your forum shortname
|
|
||||||
|
|
||||||
/* * * DON'T EDIT BELOW THIS LINE * * */
|
|
||||||
(function() {
|
|
||||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
|
||||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
|
||||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
|
|
||||||
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
|
@ -1,60 +0,0 @@
|
||||||
.highlight { background: #ffffff; }
|
|
||||||
.highlight .c { color: #999988; font-style: italic } /* Comment */
|
|
||||||
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
|
|
||||||
.highlight .k { font-weight: bold } /* Keyword */
|
|
||||||
.highlight .o { font-weight: bold } /* Operator */
|
|
||||||
.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */
|
|
||||||
.highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */
|
|
||||||
.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */
|
|
||||||
.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
|
|
||||||
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
|
|
||||||
.highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */
|
|
||||||
.highlight .ge { font-style: italic } /* Generic.Emph */
|
|
||||||
.highlight .gr { color: #aa0000 } /* Generic.Error */
|
|
||||||
.highlight .gh { color: #999999 } /* Generic.Heading */
|
|
||||||
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
|
|
||||||
.highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */
|
|
||||||
.highlight .go { color: #888888 } /* Generic.Output */
|
|
||||||
.highlight .gp { color: #555555 } /* Generic.Prompt */
|
|
||||||
.highlight .gs { font-weight: bold } /* Generic.Strong */
|
|
||||||
.highlight .gu { color: #aaaaaa } /* Generic.Subheading */
|
|
||||||
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
|
|
||||||
.highlight .kc { font-weight: bold } /* Keyword.Constant */
|
|
||||||
.highlight .kd { font-weight: bold } /* Keyword.Declaration */
|
|
||||||
.highlight .kp { font-weight: bold } /* Keyword.Pseudo */
|
|
||||||
.highlight .kr { font-weight: bold } /* Keyword.Reserved */
|
|
||||||
.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */
|
|
||||||
.highlight .m { color: #009999 } /* Literal.Number */
|
|
||||||
.highlight .s { color: #d14 } /* Literal.String */
|
|
||||||
.highlight .na { color: #008080 } /* Name.Attribute */
|
|
||||||
.highlight .nb { color: #0086B3 } /* Name.Builtin */
|
|
||||||
.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */
|
|
||||||
.highlight .no { color: #008080 } /* Name.Constant */
|
|
||||||
.highlight .ni { color: #800080 } /* Name.Entity */
|
|
||||||
.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */
|
|
||||||
.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */
|
|
||||||
.highlight .nn { color: #555555 } /* Name.Namespace */
|
|
||||||
.highlight .nt { color: #000080 } /* Name.Tag */
|
|
||||||
.highlight .nv { color: #008080 } /* Name.Variable */
|
|
||||||
.highlight .ow { font-weight: bold } /* Operator.Word */
|
|
||||||
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
|
|
||||||
.highlight .mf { color: #009999 } /* Literal.Number.Float */
|
|
||||||
.highlight .mh { color: #009999 } /* Literal.Number.Hex */
|
|
||||||
.highlight .mi { color: #009999 } /* Literal.Number.Integer */
|
|
||||||
.highlight .mo { color: #009999 } /* Literal.Number.Oct */
|
|
||||||
.highlight .sb { color: #d14 } /* Literal.String.Backtick */
|
|
||||||
.highlight .sc { color: #d14 } /* Literal.String.Char */
|
|
||||||
.highlight .sd { color: #d14 } /* Literal.String.Doc */
|
|
||||||
.highlight .s2 { color: #d14 } /* Literal.String.Double */
|
|
||||||
.highlight .se { color: #d14 } /* Literal.String.Escape */
|
|
||||||
.highlight .sh { color: #d14 } /* Literal.String.Heredoc */
|
|
||||||
.highlight .si { color: #d14 } /* Literal.String.Interpol */
|
|
||||||
.highlight .sx { color: #d14 } /* Literal.String.Other */
|
|
||||||
.highlight .sr { color: #009926 } /* Literal.String.Regex */
|
|
||||||
.highlight .s1 { color: #d14 } /* Literal.String.Single */
|
|
||||||
.highlight .ss { color: #990073 } /* Literal.String.Symbol */
|
|
||||||
.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */
|
|
||||||
.highlight .vc { color: #008080 } /* Name.Variable.Class */
|
|
||||||
.highlight .vg { color: #008080 } /* Name.Variable.Global */
|
|
||||||
.highlight .vi { color: #008080 } /* Name.Variable.Instance */
|
|
||||||
.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
.toc ul {
|
||||||
|
list-style: none;
|
||||||
|
list-style-position: inside;
|
||||||
|
padding-left: 15px;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
---
|
||||||
|
---
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="css/toc.css">
|
||||||
|
|
||||||
|
h1. Contents
|
||||||
|
* "Introduction":./Home.html
|
||||||
|
* "Download":./Download.html
|
||||||
|
* "Support":./Support.html
|
||||||
|
* "Contribute":./Contribute.html
|
||||||
|
|
||||||
|
h2. Getting Started
|
||||||
|
* "Tutorial: A First Look at Druid":./Tutorial:-A-First-Look-at-Druid.html
|
||||||
|
* "Tutorial: The Druid Cluster":./Tutorial:-The-Druid-Cluster.html
|
||||||
|
* "Loading Your Data":./Loading-Your-Data.html
|
||||||
|
* "Querying Your Data":./Querying-your-data.html
|
||||||
|
* "Booting a Production Cluster":./Booting-a-production-cluster.html
|
||||||
|
* "Examples":./Examples.html
|
||||||
|
* "Configuration":Configuration.html
|
||||||
|
|
||||||
|
h2. Data Ingestion
|
||||||
|
* "Realtime":./Realtime.html
|
||||||
|
* "Batch":./Batch-ingestion.html
|
||||||
|
* "Indexing Service":./Indexing-Service.html
|
||||||
|
|
||||||
|
h2. Querying
|
||||||
|
* "Querying":./Querying.html
|
||||||
|
** "Filters":./Filters.html
|
||||||
|
** "Aggregations":./Aggregations.html
|
||||||
|
** "Post Aggregations":./Post-aggregations.html
|
||||||
|
** "Granularities":./Granularities.html
|
||||||
|
* Query Types
|
||||||
|
** "GroupByQuery":./GroupByQuery.html
|
||||||
|
*** "OrderBy":./OrderBy.html
|
||||||
|
*** "Having":./Having.html
|
||||||
|
** "SearchQuery":./Having.html
|
||||||
|
*** "SearchQuerySpec":./SearchQuerySpec.html
|
||||||
|
** "SegmentMetadataQuery":./SegmentMetadataQuery.html
|
||||||
|
** "TimeBoundaryQuery":./TimeBoundaryQuery.html
|
||||||
|
** "TimeseriesQuery":./TimeseriesQuery.html
|
||||||
|
|
||||||
|
h2. Architecture
|
||||||
|
* "Design":./Design.html
|
||||||
|
* "Segments":./Segments.html
|
||||||
|
* Node Types
|
||||||
|
** "Compute":./Compute.html
|
||||||
|
** "Broker":./Broker.html
|
||||||
|
** "Master":./Master.html
|
||||||
|
*** "Rule Configuration":./Rule-Configuration.html
|
||||||
|
** "Realtime":./Realtime.html
|
||||||
|
*** "Firehose":./Firehose.html
|
||||||
|
*** "Plumber":./Plumber.html
|
||||||
|
* External Dependencies
|
||||||
|
** "Deep Storage":./Deep-Storage.html
|
||||||
|
** "MySQL":./MySQL.html
|
||||||
|
** "ZooKeeper":./ZooKeeper.html
|
||||||
|
* "Concepts and Terminology":./Concepts-and-Terminology.html
|
||||||
|
|
||||||
|
h2. Development
|
||||||
|
* "Versioning":./Versioning.html
|
||||||
|
* "Build From Source":./Build-from-source.html
|
||||||
|
* "Libraries":./Libraries.html
|
||||||
|
|
||||||
|
h2. Misc
|
||||||
|
* "Thanks":./Thanks.html
|
|
@ -0,0 +1,14 @@
|
||||||
|
.blog-listing {
|
||||||
|
margin-bottom: 70px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.blog-entry {
|
||||||
|
margin-bottom: 70px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.recents ul li {
|
||||||
|
font-weight: 400;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
.sub-text {
|
||||||
|
margin-top: 20px;
|
||||||
|
margin-bottom: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-marketing {
|
||||||
|
margin-bottom: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-marketing a {
|
||||||
|
color: #000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
font-weight: 400;
|
||||||
|
font-size: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-marketing img {
|
||||||
|
margin-bottom: 40px;
|
||||||
|
}
|
|
@ -1,13 +1,7 @@
|
||||||
---
|
<!DOCTYPE html>
|
||||||
layout: default
|
<html>
|
||||||
title: Your New Jekyll Site
|
<head>
|
||||||
---
|
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
|
||||||
|
<meta http-equiv="refresh" content="0;url=/content/Home.html" />
|
||||||
<div id="home">
|
</head>
|
||||||
<h1>Blog Posts</h1>
|
</html>
|
||||||
<ul class="posts">
|
|
||||||
{% for post in site.posts %}
|
|
||||||
<li><span>{{ post.date | date_to_string }}</span> » <a href="{{ post.url }}">{{ post.title }}</a></li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
|
|
|
@ -200,12 +200,12 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer
|
||||||
command.add("io.druid.cli.Main");
|
command.add("io.druid.cli.Main");
|
||||||
command.add("internal");
|
command.add("internal");
|
||||||
command.add("peon");
|
command.add("peon");
|
||||||
|
command.add(taskFile.toString());
|
||||||
|
command.add(statusFile.toString());
|
||||||
String nodeType = task.getNodeType();
|
String nodeType = task.getNodeType();
|
||||||
if (nodeType != null) {
|
if (nodeType != null) {
|
||||||
command.add(String.format("--nodeType %s", nodeType));
|
command.add(String.format("--nodeType %s", nodeType));
|
||||||
}
|
}
|
||||||
command.add(taskFile.toString());
|
|
||||||
command.add(statusFile.toString());
|
|
||||||
|
|
||||||
jsonMapper.writeValue(taskFile, task);
|
jsonMapper.writeValue(taskFile, task);
|
||||||
|
|
||||||
|
|
|
@ -25,5 +25,6 @@ public interface Server
|
||||||
{
|
{
|
||||||
public String getScheme();
|
public String getScheme();
|
||||||
public String getHost();
|
public String getHost();
|
||||||
|
public String getAddress();
|
||||||
public int getPort();
|
public int getPort();
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,7 @@ public class CuratorServiceAnnouncer implements ServiceAnnouncer
|
||||||
try {
|
try {
|
||||||
instance = ServiceInstance.<Void>builder()
|
instance = ServiceInstance.<Void>builder()
|
||||||
.name(serviceName)
|
.name(serviceName)
|
||||||
.address(service.getHost())
|
.address(service.getHostNoPort())
|
||||||
.port(service.getPort())
|
.port(service.getPort())
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
|
@ -147,9 +147,9 @@ public class DiscoveryModule implements Module
|
||||||
final Injector injector,
|
final Injector injector,
|
||||||
final Set<KeyHolder<DruidNode>> nodesToAnnounce,
|
final Set<KeyHolder<DruidNode>> nodesToAnnounce,
|
||||||
final Lifecycle lifecycle
|
final Lifecycle lifecycle
|
||||||
)
|
) throws Exception
|
||||||
{
|
{
|
||||||
lifecycle.addHandler(
|
lifecycle.addMaybeStartHandler(
|
||||||
new Lifecycle.Handler()
|
new Lifecycle.Handler()
|
||||||
{
|
{
|
||||||
private volatile List<DruidNode> nodes = null;
|
private volatile List<DruidNode> nodes = null;
|
||||||
|
@ -203,7 +203,7 @@ public class DiscoveryModule implements Module
|
||||||
.client(curator)
|
.client(curator)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
lifecycle.addHandler(
|
lifecycle.addMaybeStartHandler(
|
||||||
new Lifecycle.Handler()
|
new Lifecycle.Handler()
|
||||||
{
|
{
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -63,6 +63,12 @@ public class ServerDiscoverySelector implements DiscoverySelector<Server>
|
||||||
{
|
{
|
||||||
@Override
|
@Override
|
||||||
public String getHost()
|
public String getHost()
|
||||||
|
{
|
||||||
|
return String.format("%s:%d", getAddress(), getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAddress()
|
||||||
{
|
{
|
||||||
return instance.getAddress();
|
return instance.getAddress();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package io.druid.cli;
|
package io.druid.cli;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||||
|
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.inject.Binder;
|
import com.google.inject.Binder;
|
||||||
import com.google.inject.Injector;
|
import com.google.inject.Injector;
|
||||||
|
@ -28,6 +30,10 @@ import com.google.inject.TypeLiteral;
|
||||||
import com.google.inject.multibindings.MapBinder;
|
import com.google.inject.multibindings.MapBinder;
|
||||||
import com.google.inject.servlet.GuiceFilter;
|
import com.google.inject.servlet.GuiceFilter;
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
|
import druid.examples.flights.FlightsFirehoseFactory;
|
||||||
|
import druid.examples.rand.RandomFirehoseFactory;
|
||||||
|
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
|
||||||
|
import druid.examples.web.WebFirehoseFactory;
|
||||||
import io.airlift.command.Command;
|
import io.airlift.command.Command;
|
||||||
import io.druid.guice.IndexingServiceModuleHelper;
|
import io.druid.guice.IndexingServiceModuleHelper;
|
||||||
import io.druid.guice.JacksonConfigProvider;
|
import io.druid.guice.JacksonConfigProvider;
|
||||||
|
@ -41,6 +47,8 @@ import io.druid.guice.PolyBind;
|
||||||
import io.druid.indexing.common.actions.LocalTaskActionClientFactory;
|
import io.druid.indexing.common.actions.LocalTaskActionClientFactory;
|
||||||
import io.druid.indexing.common.actions.TaskActionClientFactory;
|
import io.druid.indexing.common.actions.TaskActionClientFactory;
|
||||||
import io.druid.indexing.common.actions.TaskActionToolbox;
|
import io.druid.indexing.common.actions.TaskActionToolbox;
|
||||||
|
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
|
||||||
|
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
|
||||||
import io.druid.indexing.common.tasklogs.SwitchingTaskLogStreamer;
|
import io.druid.indexing.common.tasklogs.SwitchingTaskLogStreamer;
|
||||||
import io.druid.indexing.common.tasklogs.TaskLogStreamer;
|
import io.druid.indexing.common.tasklogs.TaskLogStreamer;
|
||||||
import io.druid.indexing.common.tasklogs.TaskLogs;
|
import io.druid.indexing.common.tasklogs.TaskLogs;
|
||||||
|
@ -69,6 +77,12 @@ import io.druid.indexing.coordinator.scaling.ResourceManagementStrategy;
|
||||||
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementConfig;
|
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementConfig;
|
||||||
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementStrategy;
|
import io.druid.indexing.coordinator.scaling.SimpleResourceManagementStrategy;
|
||||||
import io.druid.indexing.coordinator.setup.WorkerSetupData;
|
import io.druid.indexing.coordinator.setup.WorkerSetupData;
|
||||||
|
import io.druid.initialization.DruidModule;
|
||||||
|
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
import io.druid.server.http.RedirectFilter;
|
import io.druid.server.http.RedirectFilter;
|
||||||
import io.druid.server.http.RedirectInfo;
|
import io.druid.server.http.RedirectInfo;
|
||||||
import io.druid.server.initialization.JettyServerInitializer;
|
import io.druid.server.initialization.JettyServerInitializer;
|
||||||
|
@ -84,6 +98,7 @@ import org.eclipse.jetty.servlet.ServletHolder;
|
||||||
import org.eclipse.jetty.servlets.GzipFilter;
|
import org.eclipse.jetty.servlets.GzipFilter;
|
||||||
import org.eclipse.jetty.util.resource.ResourceCollection;
|
import org.eclipse.jetty.util.resource.ResourceCollection;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -105,7 +120,7 @@ public class CliOverlord extends ServerRunnable
|
||||||
protected List<Object> getModules()
|
protected List<Object> getModules()
|
||||||
{
|
{
|
||||||
return ImmutableList.<Object>of(
|
return ImmutableList.<Object>of(
|
||||||
new Module()
|
new DruidModule()
|
||||||
{
|
{
|
||||||
@Override
|
@Override
|
||||||
public void configure(Binder binder)
|
public void configure(Binder binder)
|
||||||
|
@ -199,6 +214,27 @@ public class CliOverlord extends ServerRunnable
|
||||||
|
|
||||||
JsonConfigProvider.bind(binder, "druid.indexer.autoscale", SimpleResourceManagementConfig.class);
|
JsonConfigProvider.bind(binder, "druid.indexer.autoscale", SimpleResourceManagementConfig.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<? extends com.fasterxml.jackson.databind.Module> getJacksonModules()
|
||||||
|
{
|
||||||
|
return Arrays.<com.fasterxml.jackson.databind.Module>asList(
|
||||||
|
new SimpleModule("RealtimeModule")
|
||||||
|
.registerSubtypes(
|
||||||
|
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
|
||||||
|
new NamedType(FlightsFirehoseFactory.class, "flights"),
|
||||||
|
new NamedType(RandomFirehoseFactory.class, "rand"),
|
||||||
|
new NamedType(WebFirehoseFactory.class, "webstream"),
|
||||||
|
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
|
||||||
|
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
|
||||||
|
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
||||||
|
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
||||||
|
new NamedType(IrcFirehoseFactory.class, "irc"),
|
||||||
|
new NamedType(StaticS3FirehoseFactory.class, "s3"),
|
||||||
|
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package io.druid.cli;
|
package io.druid.cli;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||||
|
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||||
import com.google.common.base.Throwables;
|
import com.google.common.base.Throwables;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.inject.Binder;
|
import com.google.inject.Binder;
|
||||||
|
@ -28,6 +30,10 @@ import com.google.inject.Module;
|
||||||
import com.google.inject.multibindings.MapBinder;
|
import com.google.inject.multibindings.MapBinder;
|
||||||
import com.metamx.common.lifecycle.Lifecycle;
|
import com.metamx.common.lifecycle.Lifecycle;
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
|
import druid.examples.flights.FlightsFirehoseFactory;
|
||||||
|
import druid.examples.rand.RandomFirehoseFactory;
|
||||||
|
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
|
||||||
|
import druid.examples.web.WebFirehoseFactory;
|
||||||
import io.airlift.command.Arguments;
|
import io.airlift.command.Arguments;
|
||||||
import io.airlift.command.Command;
|
import io.airlift.command.Command;
|
||||||
import io.airlift.command.Option;
|
import io.airlift.command.Option;
|
||||||
|
@ -45,18 +51,26 @@ import io.druid.indexing.common.actions.RemoteTaskActionClientFactory;
|
||||||
import io.druid.indexing.common.actions.TaskActionClientFactory;
|
import io.druid.indexing.common.actions.TaskActionClientFactory;
|
||||||
import io.druid.indexing.common.config.TaskConfig;
|
import io.druid.indexing.common.config.TaskConfig;
|
||||||
import io.druid.indexing.common.index.ChatHandlerProvider;
|
import io.druid.indexing.common.index.ChatHandlerProvider;
|
||||||
|
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
|
||||||
import io.druid.indexing.common.index.EventReceivingChatHandlerProvider;
|
import io.druid.indexing.common.index.EventReceivingChatHandlerProvider;
|
||||||
import io.druid.indexing.common.index.NoopChatHandlerProvider;
|
import io.druid.indexing.common.index.NoopChatHandlerProvider;
|
||||||
|
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
|
||||||
import io.druid.indexing.coordinator.TaskRunner;
|
import io.druid.indexing.coordinator.TaskRunner;
|
||||||
import io.druid.indexing.coordinator.ThreadPoolTaskRunner;
|
import io.druid.indexing.coordinator.ThreadPoolTaskRunner;
|
||||||
import io.druid.indexing.worker.executor.ChatHandlerResource;
|
import io.druid.indexing.worker.executor.ChatHandlerResource;
|
||||||
import io.druid.indexing.worker.executor.ExecutorLifecycle;
|
import io.druid.indexing.worker.executor.ExecutorLifecycle;
|
||||||
import io.druid.indexing.worker.executor.ExecutorLifecycleConfig;
|
import io.druid.indexing.worker.executor.ExecutorLifecycleConfig;
|
||||||
|
import io.druid.initialization.DruidModule;
|
||||||
import io.druid.query.QuerySegmentWalker;
|
import io.druid.query.QuerySegmentWalker;
|
||||||
import io.druid.segment.loading.DataSegmentKiller;
|
import io.druid.segment.loading.DataSegmentKiller;
|
||||||
import io.druid.segment.loading.S3DataSegmentKiller;
|
import io.druid.segment.loading.S3DataSegmentKiller;
|
||||||
import io.druid.segment.loading.SegmentLoaderConfig;
|
import io.druid.segment.loading.SegmentLoaderConfig;
|
||||||
import io.druid.segment.loading.StorageLocationConfig;
|
import io.druid.segment.loading.StorageLocationConfig;
|
||||||
|
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
import io.druid.server.initialization.JettyServerInitializer;
|
import io.druid.server.initialization.JettyServerInitializer;
|
||||||
import org.eclipse.jetty.server.Server;
|
import org.eclipse.jetty.server.Server;
|
||||||
|
|
||||||
|
@ -90,7 +104,7 @@ public class CliPeon extends GuiceRunnable
|
||||||
protected List<Object> getModules()
|
protected List<Object> getModules()
|
||||||
{
|
{
|
||||||
return ImmutableList.<Object>of(
|
return ImmutableList.<Object>of(
|
||||||
new Module()
|
new DruidModule()
|
||||||
{
|
{
|
||||||
@Override
|
@Override
|
||||||
public void configure(Binder binder)
|
public void configure(Binder binder)
|
||||||
|
@ -143,6 +157,27 @@ public class CliPeon extends GuiceRunnable
|
||||||
|
|
||||||
LifecycleModule.register(binder, Server.class);
|
LifecycleModule.register(binder, Server.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<? extends com.fasterxml.jackson.databind.Module> getJacksonModules()
|
||||||
|
{
|
||||||
|
return Arrays.<com.fasterxml.jackson.databind.Module>asList(
|
||||||
|
new SimpleModule("RealtimeModule")
|
||||||
|
.registerSubtypes(
|
||||||
|
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
|
||||||
|
new NamedType(FlightsFirehoseFactory.class, "flights"),
|
||||||
|
new NamedType(RandomFirehoseFactory.class, "rand"),
|
||||||
|
new NamedType(WebFirehoseFactory.class, "webstream"),
|
||||||
|
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
|
||||||
|
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
|
||||||
|
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
||||||
|
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
||||||
|
new NamedType(IrcFirehoseFactory.class, "irc"),
|
||||||
|
new NamedType(StaticS3FirehoseFactory.class, "s3"),
|
||||||
|
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,9 +35,16 @@ import io.druid.client.InventoryView;
|
||||||
import io.druid.client.ServerView;
|
import io.druid.client.ServerView;
|
||||||
import io.druid.guice.NoopSegmentPublisherProvider;
|
import io.druid.guice.NoopSegmentPublisherProvider;
|
||||||
import io.druid.guice.RealtimeModule;
|
import io.druid.guice.RealtimeModule;
|
||||||
|
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
|
||||||
|
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
|
||||||
import io.druid.initialization.DruidModule;
|
import io.druid.initialization.DruidModule;
|
||||||
import io.druid.segment.loading.DataSegmentPusher;
|
import io.druid.segment.loading.DataSegmentPusher;
|
||||||
import io.druid.segment.realtime.SegmentPublisher;
|
import io.druid.segment.realtime.SegmentPublisher;
|
||||||
|
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
import io.druid.server.coordination.DataSegmentAnnouncer;
|
import io.druid.server.coordination.DataSegmentAnnouncer;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
|
|
||||||
|
@ -88,7 +95,14 @@ public class CliRealtimeExample extends ServerRunnable
|
||||||
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
|
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
|
||||||
new NamedType(FlightsFirehoseFactory.class, "flights"),
|
new NamedType(FlightsFirehoseFactory.class, "flights"),
|
||||||
new NamedType(RandomFirehoseFactory.class, "rand"),
|
new NamedType(RandomFirehoseFactory.class, "rand"),
|
||||||
new NamedType(WebFirehoseFactory.class, "webstream")
|
new NamedType(WebFirehoseFactory.class, "webstream"),
|
||||||
|
new NamedType(KafkaFirehoseFactory.class, "kafka"),
|
||||||
|
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
|
||||||
|
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
||||||
|
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
||||||
|
new NamedType(IrcFirehoseFactory.class, "irc"),
|
||||||
|
new NamedType(StaticS3FirehoseFactory.class, "s3"),
|
||||||
|
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,6 +85,8 @@ public class ConvertProperties implements Runnable
|
||||||
new Rename("druid.indexer.terminateResources.duration", "druid.indexer.autoscale.terminatePeriod"),
|
new Rename("druid.indexer.terminateResources.duration", "druid.indexer.autoscale.terminatePeriod"),
|
||||||
new Rename("druid.indexer.terminateResources.originDateTime", "druid.indexer.autoscale.originTime"),
|
new Rename("druid.indexer.terminateResources.originDateTime", "druid.indexer.autoscale.originTime"),
|
||||||
new Rename("druid.indexer.autoscaling.strategy", "druid.indexer.autoscale.strategy"),
|
new Rename("druid.indexer.autoscaling.strategy", "druid.indexer.autoscale.strategy"),
|
||||||
|
new Rename("druid.indexer.logs.s3bucket", "druid.indexer.logs.s3Bucket"),
|
||||||
|
new Rename("druid.indexer.logs.s3prefix", "druid.indexer.logs.s3Prefix"),
|
||||||
new Rename("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion", "druid.indexer.autoscale.workerIdleTimeout"),
|
new Rename("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion", "druid.indexer.autoscale.workerIdleTimeout"),
|
||||||
new Rename("druid.indexer.maxScalingDuration", "druid.indexer.autoscale.scalingTimeout"),
|
new Rename("druid.indexer.maxScalingDuration", "druid.indexer.autoscale.scalingTimeout"),
|
||||||
new Rename("druid.indexer.numEventsToTrack", "druid.indexer.autoscale.numEventsToTrack"),
|
new Rename("druid.indexer.numEventsToTrack", "druid.indexer.autoscale.numEventsToTrack"),
|
||||||
|
@ -122,7 +124,7 @@ public class ConvertProperties implements Runnable
|
||||||
}
|
}
|
||||||
|
|
||||||
File outFile = new File(outFilename);
|
File outFile = new File(outFilename);
|
||||||
if (!outFile.getParentFile().exists()) {
|
if (outFile.getParentFile() != null && !outFile.getParentFile().exists()) {
|
||||||
outFile.getParentFile().mkdirs();
|
outFile.getParentFile().mkdirs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,8 +146,10 @@ public class ConvertProperties implements Runnable
|
||||||
for (PropertyConverter converter : converters) {
|
for (PropertyConverter converter : converters) {
|
||||||
if (converter.canHandle(property)) {
|
if (converter.canHandle(property)) {
|
||||||
for (Map.Entry<String, String> entry : converter.convert(fromFile).entrySet()) {
|
for (Map.Entry<String, String> entry : converter.convert(fromFile).entrySet()) {
|
||||||
++count;
|
if (entry.getValue() != null) {
|
||||||
updatedProps.setProperty(entry.getKey(), entry.getValue());
|
++count;
|
||||||
|
updatedProps.setProperty(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
handled = true;
|
handled = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,11 @@ public class Rename implements PropertyConverter
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> convert(Properties properties)
|
public Map<String, String> convert(Properties properties)
|
||||||
{
|
{
|
||||||
return ImmutableMap.of(newProperty, properties.getProperty(property));
|
final String value = properties.getProperty(property);
|
||||||
|
if (value != null) {
|
||||||
|
return ImmutableMap.of(newProperty, value);
|
||||||
|
} else {
|
||||||
|
return ImmutableMap.of();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,13 @@ import com.google.inject.Binder;
|
||||||
import com.google.inject.Key;
|
import com.google.inject.Key;
|
||||||
import com.google.inject.TypeLiteral;
|
import com.google.inject.TypeLiteral;
|
||||||
import com.google.inject.multibindings.MapBinder;
|
import com.google.inject.multibindings.MapBinder;
|
||||||
|
import druid.examples.flights.FlightsFirehoseFactory;
|
||||||
|
import druid.examples.rand.RandomFirehoseFactory;
|
||||||
|
import druid.examples.twitter.TwitterSpritzerFirehoseFactory;
|
||||||
|
import druid.examples.web.WebFirehoseFactory;
|
||||||
import io.druid.cli.QueryJettyServerInitializer;
|
import io.druid.cli.QueryJettyServerInitializer;
|
||||||
|
import io.druid.indexing.common.index.EventReceiverFirehoseFactory;
|
||||||
|
import io.druid.indexing.common.index.StaticS3FirehoseFactory;
|
||||||
import io.druid.initialization.DruidModule;
|
import io.druid.initialization.DruidModule;
|
||||||
import io.druid.query.QuerySegmentWalker;
|
import io.druid.query.QuerySegmentWalker;
|
||||||
import io.druid.segment.realtime.DbSegmentPublisher;
|
import io.druid.segment.realtime.DbSegmentPublisher;
|
||||||
|
@ -34,7 +40,11 @@ import io.druid.segment.realtime.FireDepartment;
|
||||||
import io.druid.segment.realtime.NoopSegmentPublisher;
|
import io.druid.segment.realtime.NoopSegmentPublisher;
|
||||||
import io.druid.segment.realtime.RealtimeManager;
|
import io.druid.segment.realtime.RealtimeManager;
|
||||||
import io.druid.segment.realtime.SegmentPublisher;
|
import io.druid.segment.realtime.SegmentPublisher;
|
||||||
|
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
|
import io.druid.segment.realtime.firehose.KafkaFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.RabbitMQFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
import io.druid.server.initialization.JettyServerInitializer;
|
import io.druid.server.initialization.JettyServerInitializer;
|
||||||
import org.eclipse.jetty.server.Server;
|
import org.eclipse.jetty.server.Server;
|
||||||
|
|
||||||
|
@ -80,7 +90,17 @@ public class RealtimeModule implements DruidModule
|
||||||
return Arrays.<Module>asList(
|
return Arrays.<Module>asList(
|
||||||
new SimpleModule("RealtimeModule")
|
new SimpleModule("RealtimeModule")
|
||||||
.registerSubtypes(
|
.registerSubtypes(
|
||||||
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2")
|
new NamedType(TwitterSpritzerFirehoseFactory.class, "twitzer"),
|
||||||
|
new NamedType(FlightsFirehoseFactory.class, "flights"),
|
||||||
|
new NamedType(RandomFirehoseFactory.class, "rand"),
|
||||||
|
new NamedType(WebFirehoseFactory.class, "webstream"),
|
||||||
|
new NamedType(KafkaFirehoseFactory.class, "kafka-0.7.2"),
|
||||||
|
new NamedType(RabbitMQFirehoseFactory.class, "rabbitmq"),
|
||||||
|
new NamedType(ClippedFirehoseFactory.class, "clipped"),
|
||||||
|
new NamedType(TimedShutoffFirehoseFactory.class, "timed"),
|
||||||
|
new NamedType(IrcFirehoseFactory.class, "irc"),
|
||||||
|
new NamedType(StaticS3FirehoseFactory.class, "s3"),
|
||||||
|
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue