mirror of https://github.com/apache/druid.git
Merge branch 'master' into druid-0.7.x
This commit is contained in:
commit
1cfabc127f
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -1,48 +1,10 @@
|
|||
|
||||
<!-- Start page_footer include -->
|
||||
<div class="container">
|
||||
<footer>
|
||||
<div class="container">
|
||||
<hr>
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<address>
|
||||
<strong>CONTACT US</strong>
|
||||
<a href="mailto:info@druid.io">info@druid.io</a>
|
||||
</address>
|
||||
<address>
|
||||
<div class="soc">
|
||||
<a href="https://twitter.com/druidio"></a>
|
||||
<a href="https://github.com/metamx/druid" class="github"></a>
|
||||
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
||||
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
||||
</div>
|
||||
</div>
|
||||
<ul class="col-md-4 list-unstyled">
|
||||
<li><a href="/"><strong>DRUID</strong></a></li>
|
||||
<li><a href="/druid.html">What is Druid?</a></li>
|
||||
<li><a href="/downloads.html">Downloads</a></li>
|
||||
<li><a target="_blank" href="Home.html">Documentation</a></li>
|
||||
</ul>
|
||||
<ul class="col-md-4 list-unstyled">
|
||||
<li><a href="/community.html"><strong>SUPPORT</strong></a></li>
|
||||
<li><a href="/community.html">Community</a></li>
|
||||
<li><a href="/faq.html">FAQ</a></li>
|
||||
<li><a href="/licensing.html">Licensing</a></li>
|
||||
<li><a href="/blog"><strong>BLOG</strong></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</footer>
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
||||
</script>
|
||||
<script type="text/javascript">
|
||||
try {
|
||||
var pageTracker = _gat._getTracker("UA-40280432-1");
|
||||
pageTracker._trackPageview();
|
||||
} catch(err) {}
|
||||
<script src="http://code.jquery.com/jquery.min.js"></script>
|
||||
</script>
|
||||
<!-- stop page_footer include -->
|
||||
|
|
|
@ -1,25 +1,16 @@
|
|||
|
||||
<!-- Start page_header include -->
|
||||
<div class="navbar navbar-inverse navbar-static-top">
|
||||
<div class="container druid-navbar">
|
||||
<div class="navbar navbar-inverse navbar-static-top druid-nav">
|
||||
<div class="container">
|
||||
<div class="navbar-header">
|
||||
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
</button>
|
||||
<a class="navbar-brand" href="/">Druid</a>
|
||||
</div>
|
||||
<div class="navbar-collapse collapse">
|
||||
<ul class="nav navbar-nav">
|
||||
<li {% if page.id == 'home' %} class="active"{% endif %}><a href="/">Home</a></li>
|
||||
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}><a href="/druid.html">What is Druid?</a></li>
|
||||
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}><a href="/downloads.html">Downloads</a></li>
|
||||
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
|
||||
<li {% if page.sectionid == 'community' %} class="active"{% endif %}><a href="/community.html">Community</a></li>
|
||||
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}><a href="/faq.html">FAQ</a></li>
|
||||
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}><a href="/blog">Blog</a></li>
|
||||
<li class="divider"></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -5,15 +5,13 @@
|
|||
|
||||
<title>Druid | {{page.title}}</title>
|
||||
|
||||
<!-- Latest compiled and minified CSS -->
|
||||
<link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.css">
|
||||
<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css">
|
||||
|
||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600' rel='stylesheet' type='text/css'>
|
||||
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
||||
|
||||
<link rel="stylesheet" href="//druid.io/css/main.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/header.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/footer.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/syntax.css">
|
||||
|
||||
|
||||
<link rel="stylesheet" href="//druid.io/css/docs.css">
|
||||
|
|
|
@ -2,35 +2,30 @@
|
|||
<html lang="en">
|
||||
<head>
|
||||
{% include site_head.html %}
|
||||
|
||||
<link rel="stylesheet" href="css/docs.css">
|
||||
</head>
|
||||
<body>
|
||||
{% include page_header.html %}
|
||||
|
||||
<div class="container">
|
||||
<div class="page-header">
|
||||
<h1>Documentation</h1>
|
||||
<div class="druid-header">
|
||||
<div class="container">
|
||||
<h1>Documentation</h1>
|
||||
<h4></h4>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-3 toc" id="toc">
|
||||
</div>
|
||||
|
||||
<div class="col-md-9 doc-content">
|
||||
{{ content }}
|
||||
</div>
|
||||
<div class="col-md-3 toc" id="toc">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% include page_footer.html %}
|
||||
<script src="http://code.jquery.com/jquery.js"></script>
|
||||
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#toc").load("toc.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>$(function() { $(".toc").load("toc.html"); });</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
### ApproxHistogram aggregator
|
||||
|
||||
This aggregator is based on [http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf) to compute approximate histograms.
|
||||
|
|
|
@ -12,7 +12,7 @@ You can provision individual servers, loading Druid onto each machine (or buildi
|
|||
|
||||
[Apache Whirr](http://whirr.apache.org/) is a set of libraries for launching cloud services. For Druid, Whirr serves as an easy way to launch a cluster in Amazon AWS by using simple commands and configuration files (called *recipes*).
|
||||
|
||||
**NOTE:** Whirr will install Druid 0.6.136. Also, it doesn't work with JDK1.7.0_55. JDK1.7.0_45 recommended.
|
||||
**NOTE:** Whirr will install Druid 0.6.137. Also, it doesn't work with JDK1.7.0_55. JDK1.7.0_45 recommended.
|
||||
|
||||
You'll need an AWS account, S3 Bucket and an EC2 key pair from that account so that Whirr can connect to the cloud via the EC2 API. If you haven't generated a key pair, see the [AWS documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) or see this [Whirr FAQ](http://whirr.apache.org/faq.html#how-do-i-find-my-cloud-credentials).
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ layout: doc_page
|
|||
Data Formats for Ingestion
|
||||
==========================
|
||||
|
||||
Druid can ingest data in JSON, CSV, or TSV. While most examples in the documentation use data in JSON format, it is not difficult to configure Druid to ingest CSV or TSV data.
|
||||
Druid can ingest data in JSON, CSV, or custom delimited data such as TSV. While most examples in the documentation use data in JSON format, it is not difficult to configure Druid to ingest CSV or other delimited data.
|
||||
|
||||
## Formatting the Data
|
||||
The following are three samples of the data used in the [Wikipedia example](Tutorial:-Loading-Your-Data-Part-1.html).
|
||||
|
@ -41,8 +41,8 @@ _TSV_
|
|||
|
||||
Note that the CSV and TSV data do not contain column heads. This becomes important when you specify the data for ingesting.
|
||||
|
||||
## Configuring Ingestion For the Indexing Service
|
||||
If you use the [indexing service](Indexing-Service.html) for ingesting the data, a [task](Tasks.html) must be configured and submitted. Tasks are configured with a JSON object which, among other things, specifies the data source and type. In the Wikipedia example, JSON data was read from a local file. The task spec contains a firehose element to specify this:
|
||||
## Configuration
|
||||
All forms of Druid ingestion require some form of schema object. An example blob of json pertaining to the data format may look something like this:
|
||||
|
||||
```json
|
||||
"firehose" : {
|
||||
|
|
|
@ -19,13 +19,13 @@ Clone Druid and build it:
|
|||
git clone https://github.com/metamx/druid.git druid
|
||||
cd druid
|
||||
git fetch --tags
|
||||
git checkout druid-0.6.136
|
||||
git checkout druid-0.6.137
|
||||
./build.sh
|
||||
```
|
||||
|
||||
### Downloading the DSK (Druid Standalone Kit)
|
||||
|
||||
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.136-bin.tar.gz) a stand-alone tarball and run it:
|
||||
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.137-bin.tar.gz) a stand-alone tarball and run it:
|
||||
|
||||
``` bash
|
||||
tar -xzf druid-services-0.X.X-bin.tar.gz
|
||||
|
|
|
@ -36,10 +36,40 @@ See [Examples](Examples.html). This firehose connects directly to the twitter sp
|
|||
|
||||
See [Examples](Examples.html). This firehose creates a stream of random numbers.
|
||||
|
||||
#### RabbitMqFirehouse
|
||||
#### RabbitMqFirehose
|
||||
|
||||
This firehose ingests events from a define rabbit-mq queue.
|
||||
|
||||
#### IngestSegmentFirehose
|
||||
|
||||
This Firehose can be used to read the data from existing druid segments.
|
||||
It can be used ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment.
|
||||
A sample ingest firehose spec is shown below -
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "ingestSegment",
|
||||
"dataSource" : "wikipedia",
|
||||
"interval" : "2013-01-01/2013-01-02",
|
||||
"dimensions":[],
|
||||
"metrics":[]
|
||||
}
|
||||
```
|
||||
|
||||
|property|description|required?|
|
||||
|--------|-----------|---------|
|
||||
|type|ingestSegment. Type of firehose|yes|
|
||||
|dataSource|A String defining the data source to fetch rows from, very similar to a table in a relational database|yes|
|
||||
|interval|A String representing ISO-8601 Interval. This defines the time range to fetch the data over.|yes|
|
||||
|dimensions|The list of dimensions to select. If left empty, all dimensions are selected.|no|
|
||||
|metrics|The list of metrics to select. If left empty, all metrics are returned.|no|
|
||||
|filter| See [Filters](Filters.html)|yes|
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Parsing Data
|
||||
------------
|
||||
|
||||
|
|
|
@ -37,6 +37,11 @@ You can check the coordinator console located at `<COORDINATOR_IP>:<PORT>/cluste
|
|||
|
||||
You can check `<BROKER_IP>:<PORT>/druid/v2/datasources/<YOUR_DATASOURCE>?interval=0/3000` for the dimensions and metrics that have been created for your datasource. Make sure that the name of the aggregators you use in your query match one of these metrics. Also make sure that the query interval you specify match a valid time range where data exists. Note: the broker endpoint will only return valid results on historical segments.
|
||||
|
||||
## How can I Reindex existing data in Druid with schema changes?
|
||||
|
||||
You can use IngestSegmentFirehose with index task to ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment.
|
||||
See [Firehose](Firehose.html) for more details on IngestSegmentFirehose.
|
||||
|
||||
## More information
|
||||
|
||||
Getting data into Druid can definitely be difficult for first time users. Please don't hesitate to ask questions in our IRC channel or on our [google groups page](https://groups.google.com/forum/#!forum/druid-development).
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Ingesting from Kafka 8
|
||||
----------------------
|
||||
|
||||
The previous examples are for Kafka 7. To support Kafka 8, a couple changes need to be made:
|
||||
|
||||
- Update realtime node's configs for Kafka 8 extensions
|
||||
- e.g.
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.137",...]`
|
||||
- becomes
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.137",...]`
|
||||
- Update realtime task config for changed keys
|
||||
- `firehose.type`, `plumber.rejectionPolicyFactory`, and all of `firehose.consumerProps` changes.
|
||||
|
||||
```json
|
||||
|
||||
"firehose" : {
|
||||
"type" : "kafka-0.8",
|
||||
"consumerProps" : {
|
||||
"zookeeper.connect": "localhost:2181",
|
||||
"zookeeper.connection.timeout.ms": "15000",
|
||||
"zookeeper.session.timeout.ms": "15000",
|
||||
"zookeeper.sync.time.ms": "5000",
|
||||
"group.id": "topic-pixel-local",
|
||||
"fetch.message.max.bytes": "1048586",
|
||||
"auto.offset.reset": "largest",
|
||||
"auto.commit.enable": "false"
|
||||
},
|
||||
"feed" : "druidtest",
|
||||
"parser" : {
|
||||
"timestampSpec" : {
|
||||
"column" : "utcdt",
|
||||
"format" : "iso"
|
||||
},
|
||||
"data" : {
|
||||
"format" : "json"
|
||||
},
|
||||
"dimensionExclusions" : [
|
||||
"wp"
|
||||
]
|
||||
}
|
||||
},
|
||||
"plumber" : {
|
||||
"type" : "realtime",
|
||||
"windowPeriod" : "PT10m",
|
||||
"segmentGranularity":"hour",
|
||||
"basePersistDirectory" : "/tmp/realtime/basePersist",
|
||||
"rejectionPolicyFactory": {
|
||||
"type": "messageTime"
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Working with different versions of Hadoop may require a bit of extra work for the time being. We will make changes to support different Hadoop versions in the near future. If you have problems outside of these instructions, please feel free to contact us in IRC or on the [forum](https://groups.google.com/forum/#!forum/druid-development).
|
||||
|
||||
Working with Hadoop 2.x
|
||||
-----------------------
|
||||
The default version of Hadoop bundled with Druid is 2.3. This should work out of the box.
|
||||
|
||||
To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`. You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`).
|
||||
|
||||
The Hadoop Index Task takes this parameter has part of the task JSON and the standalone Hadoop indexer takes this parameter as a command line argument.
|
||||
|
||||
|
||||
Working with Hadoop 1.x and older
|
||||
---------------------------------
|
||||
We recommend recompiling Druid with your particular version of Hadoop by changing the dependencies in Druid's pom.xml files. Make sure to also either override the default `hadoopDependencyCoordinates` in the code or pass your Hadoop version in as part of indexing.
|
|
@ -57,7 +57,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/overlord
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
@ -139,7 +139,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/middlemanager
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136","io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137","io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
@ -286,7 +286,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/historical
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
|
|
@ -108,21 +108,52 @@ Query Operators
|
|||
|
||||
The following table summarizes query properties.
|
||||
|
||||
|query types|property|description|required?|
|
||||
|-----------|--------|-----------|---------|
|
||||
|timeseries, groupBy, search, timeBoundary|dataSource|query is applied to this data source|yes|
|
||||
|timeseries, groupBy, search|intervals|range of time series to include in query|yes|
|
||||
|timeseries, groupBy, search, timeBoundary|context|This is a key-value map that can allow the query to alter some of the behavior of a query. It is primarily used for debugging, for example if you include `"bySegment":true` in the map, you will get results associated with the data segment they came from.|no|
|
||||
|timeseries, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|
||||
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|
||||
Properties shared by all query types
|
||||
|
||||
|property |description|required?|
|
||||
|----------|-----------|---------|
|
||||
|dataSource|query is applied to this data source|yes|
|
||||
|intervals |range of time series to include in query|yes|
|
||||
|context |This is a key-value map used to alter some of the behavior of a query. See [Query Context](#query-context) below|no|
|
||||
|
||||
|
||||
|query type|property |description|required?|
|
||||
|----------|----------|-----------|---------|
|
||||
|timeseries, topN, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|
||||
|timeseries, topN, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|
||||
|timeseries, topN, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
||||
|timeseries, topN, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
||||
|groupBy|dimensions|constrains the groupings; if empty, then one value per time granularity bucket|yes|
|
||||
|timeseries, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
||||
|timeseries, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
||||
|search|limit|maximum number of results (default is 1000), a system-level maximum can also be set via `com.metamx.query.search.maxSearchLimit`|no|
|
||||
|search|searchDimensions|Dimensions to apply the search query to. If not specified, it will search through all dimensions.|no|
|
||||
|search|query|The query portion of the search query. This is essentially a predicate that specifies if something matches.|yes|
|
||||
|
||||
Additional Information about Query Types
|
||||
----------------------------------------
|
||||
Query Context
|
||||
-------------
|
||||
|
||||
[TimeseriesQuery](TimeseriesQuery.html)
|
||||
|property |default | description |
|
||||
|--------------|---------------------|----------------------|
|
||||
|timeout | `0` (no timeout) | Query timeout in milliseconds, beyond which unfinished queries will be cancelled |
|
||||
|priority | `0` | Query Priority. Queries with higher priority get precedence for computational resources.|
|
||||
|queryId | auto-generated | Unique identifier given to this query. If a query ID is set or known, this can be used to cancel the query |
|
||||
|useCache | `true` | Flag indicating whether to leverage the query cache for this query. This may be overriden in the broker or historical node configuration |
|
||||
|populateCache | `true` | Flag indicating whether to save the results of the query to the query cache. Primarily used for debugging. This may be overriden in the broker or historical node configuration |
|
||||
|bySegment | `false` | Return "by segment" results. Pimarily used for debugging, setting it to `true` returns results associated with the data segment they came from |
|
||||
|finalize | `true` | Flag indicating whether to "finalize" aggregation results. Primarily used for debugging. For instance, the `hyperUnique` aggregator will return the full HyperLogLog sketch instead of the estimated cardinality when this flag is set to `false` |
|
||||
|
||||
Query Cancellation
|
||||
------------------
|
||||
|
||||
Queries can be cancelled explicitely using their unique identifier. If the
|
||||
query identifier is set at the time of query, or is known, the following
|
||||
endpoint can be used on the broker or router to cancel the query.
|
||||
|
||||
```sh
|
||||
DELETE /druid/v2/{queryId}
|
||||
```
|
||||
|
||||
For example, if the query ID is `abc123`, the query can be cancelled as follows:
|
||||
|
||||
```sh
|
||||
curl -X DELETE "http://host:port/druid/v2/abc123"
|
||||
```
|
||||
|
|
|
@ -27,7 +27,7 @@ druid.host=localhost
|
|||
druid.service=realtime
|
||||
druid.port=8083
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
@ -76,7 +76,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/realtime
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136","io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137","io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
|
|
@ -28,7 +28,7 @@ Configuration:
|
|||
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
|
|
|
@ -7,7 +7,8 @@ Time boundary queries return the earliest and latest data points of a data set.
|
|||
```json
|
||||
{
|
||||
"queryType" : "timeBoundary",
|
||||
"dataSource": "sample_datasource"
|
||||
"dataSource": "sample_datasource",
|
||||
"bound" : < "maxTime" | "minTime" > # optional, defaults to returning both timestamps if not set
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -17,6 +18,7 @@ There are 3 main parts to a time boundary query:
|
|||
|--------|-----------|---------|
|
||||
|queryType|This String should always be "timeBoundary"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||
|bound | Optional, set to `maxTime` or `minTime` to return only the latest or earliest timestamp. Default to returning both if not set| no |
|
||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||
|
||||
The format of the result is:
|
||||
|
|
|
@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
|||
|
||||
### Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.136-bin.tar.gz). Download this file to a directory of your choosing.
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.137-bin.tar.gz). Download this file to a directory of your choosing.
|
||||
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
||||
|
@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
|
|||
Not too lost so far right? That's great! If you cd into the directory:
|
||||
|
||||
```
|
||||
cd druid-services-0.6.136
|
||||
cd druid-services-0.6.137
|
||||
```
|
||||
|
||||
You should see a bunch of files:
|
||||
|
|
|
@ -91,7 +91,7 @@ druid.service=overlord
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
|
|
|
@ -341,60 +341,3 @@ Additional Information
|
|||
----------------------
|
||||
|
||||
Getting data into Druid can definitely be difficult for first time users. Please don't hesitate to ask questions in our IRC channel or on our [google groups page](https://groups.google.com/forum/#!forum/druid-development).
|
||||
|
||||
|
||||
Further Reading
|
||||
---------------------
|
||||
|
||||
Ingesting from Kafka 8
|
||||
---------------------------------
|
||||
|
||||
|
||||
Continuing from the Kafka 7 examples, to support Kafka 8, a couple changes need to be made:
|
||||
|
||||
- Update realtime node's configs for Kafka 8 extensions
|
||||
- e.g.
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.136",...]`
|
||||
- becomes
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.136",...]`
|
||||
- Update realtime task config for changed keys
|
||||
- `firehose.type`, `plumber.rejectionPolicyFactory`, and all of `firehose.consumerProps` changes.
|
||||
|
||||
```json
|
||||
|
||||
"firehose" : {
|
||||
"type" : "kafka-0.8",
|
||||
"consumerProps" : {
|
||||
"zookeeper.connect": "localhost:2181",
|
||||
"zookeeper.connection.timeout.ms": "15000",
|
||||
"zookeeper.session.timeout.ms": "15000",
|
||||
"zookeeper.sync.time.ms": "5000",
|
||||
"group.id": "topic-pixel-local",
|
||||
"fetch.message.max.bytes": "1048586",
|
||||
"auto.offset.reset": "largest",
|
||||
"auto.commit.enable": "false"
|
||||
},
|
||||
"feed" : "druidtest",
|
||||
"parser" : {
|
||||
"timestampSpec" : {
|
||||
"column" : "utcdt",
|
||||
"format" : "iso"
|
||||
},
|
||||
"data" : {
|
||||
"format" : "json"
|
||||
},
|
||||
"dimensionExclusions" : [
|
||||
"wp"
|
||||
]
|
||||
}
|
||||
},
|
||||
"plumber" : {
|
||||
"type" : "realtime",
|
||||
"windowPeriod" : "PT10m",
|
||||
"segmentGranularity":"hour",
|
||||
"basePersistDirectory" : "/tmp/realtime/basePersist",
|
||||
"rejectionPolicyFactory": {
|
||||
"type": "messageTime"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
|
@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes and external depende
|
|||
|
||||
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
|
||||
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.136-bin.tar.gz)
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.137-bin.tar.gz)
|
||||
|
||||
and untar the contents within by issuing:
|
||||
|
||||
|
@ -149,7 +149,7 @@ druid.port=8081
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137"]
|
||||
|
||||
# Dummy read only AWS account (used to download example data)
|
||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||
|
@ -240,7 +240,7 @@ druid.port=8083
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.136","io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.137","io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
# Change this config to db to hand off to the rest of the Druid cluster
|
||||
druid.publish.type=noop
|
||||
|
|
|
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
|||
|
||||
h3. Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.136-bin.tar.gz)
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.137-bin.tar.gz)
|
||||
Download this file to a directory of your choosing.
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
||||
|
@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
|
|||
Not too lost so far right? That's great! If you cd into the directory:
|
||||
|
||||
```
|
||||
cd druid-services-0.6.136
|
||||
cd druid-services-0.6.137
|
||||
```
|
||||
|
||||
You should see a bunch of files:
|
||||
|
|
|
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.
|
|||
|
||||
# Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.136-bin.tar.gz).
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.137-bin.tar.gz).
|
||||
Download this bad boy to a directory of your choosing.
|
||||
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
|
|
@ -26,11 +26,14 @@ h2. Configuration
|
|||
* "Historical":Historical-Config.html
|
||||
* "Broker":Broker-Config.html
|
||||
* "Realtime":Realtime-Config.html
|
||||
* "Configuring Logging":./Logging.html
|
||||
|
||||
h2. Data Ingestion
|
||||
* "Ingestion FAQ":./Ingestion-FAQ.html
|
||||
* "Realtime":./Realtime-ingestion.html
|
||||
** "Kafka-0.8.x Ingestion":./Kafka-Eight.html
|
||||
* "Batch":./Batch-ingestion.html
|
||||
** "Different Hadoop Versions":./Other-Hadoop.html
|
||||
* "Indexing Service":./Indexing-Service.html
|
||||
** "Tasks":./Tasks.html
|
||||
* "Data Formats":./Data_formats.html
|
||||
|
|
|
@ -1,21 +1,12 @@
|
|||
.sub-text {
|
||||
margin-top: 20px;
|
||||
margin-bottom: 50px;
|
||||
}
|
||||
|
||||
.main-marketing {
|
||||
margin-bottom: 50px;
|
||||
padding: 30px 0 30px 0;
|
||||
}
|
||||
|
||||
.main-marketing a {
|
||||
color: #000000;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
h2 {
|
||||
.main-marketing h2 {
|
||||
margin-top: 20px;
|
||||
font-weight: 400;
|
||||
font-size: 30px;
|
||||
}
|
||||
|
||||
.main-marketing img {
|
||||
margin-bottom: 40px;
|
||||
}
|
|
@ -4,7 +4,7 @@ druid.port=8081
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.137"]
|
||||
|
||||
# Dummy read only AWS account (used to download example data)
|
||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||
|
|
|
@ -4,7 +4,7 @@ druid.service=overlord
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.137"]
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
|
|
|
@ -4,7 +4,7 @@ druid.port=8083
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.136","io.druid.extensions:druid-kafka-seven:0.6.136","io.druid.extensions:druid-rabbitmq:0.6.136"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.137","io.druid.extensions:druid-kafka-seven:0.6.137","io.druid.extensions:druid-rabbitmq:0.6.137"]
|
||||
|
||||
# Change this config to db to hand off to the rest of the Druid cluster
|
||||
druid.publish.type=noop
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -23,7 +23,7 @@
|
|||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
<name>druid</name>
|
||||
<description>druid</description>
|
||||
<scm>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.137-SNAPSHOT</version>
|
||||
<version>0.6.138-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
Loading…
Reference in New Issue