mirror of https://github.com/apache/druid.git
Merge branch 'master' into new-init
Conflicts: examples/config/historical/runtime.properties examples/config/overlord/runtime.properties examples/config/realtime/runtime.properties processing/src/main/java/io/druid/guice/PropertiesModule.java server/src/main/java/io/druid/initialization/Initialization.java
This commit is contained in:
commit
7624bb5ea3
Binary file not shown.
7
README
7
README
|
@ -1,7 +0,0 @@
|
|||
The best place for more Druid resources is at: http://www.druid.io
|
||||
|
||||
Looking for docs? http://druid.io/docs/latest/
|
||||
|
||||
Build with build.sh
|
||||
|
||||
Want to get started? http://druid.io/docs/latest/Tutorial:-A-First-Look-at-Druid.html
|
|
@ -0,0 +1,20 @@
|
|||
## Druid
|
||||
|
||||
Druid is a distributed, column-oriented, real-time analytics data store that is
|
||||
commonly used to power exploratory dashboards in multi-tenant environments.
|
||||
Druid excels as a data warehousing solution for fast aggregate queries on
|
||||
petabyte sized data sets. Druid supports a variety of flexible filters, exact
|
||||
calculations, approximate algorithms, and other useful calculations Druid can
|
||||
load both streaming and batch data and integrates with Storm and Hadoop.
|
||||
|
||||
### More Information
|
||||
Much more information about Druid can be found on our [website](http://www.druid.io).
|
||||
|
||||
### Documentation
|
||||
We host documentation on our [website](http://druid.io/docs/latest/). If you want to contribute documentation changes, please submit a pull request to this repository.
|
||||
|
||||
### Tutorials
|
||||
We have a series of tutorials to get started with Druid, starting with this [one](http://druid.io/docs/latest/Tutorial:-A-First-Look-at-Druid.html).
|
||||
|
||||
### Support
|
||||
Contact us through our [forum](https://groups.google.com/forum/#!forum/druid-development) or on IRC in #druid-dev on irc.freenode.net.
|
6
build.sh
6
build.sh
|
@ -25,9 +25,3 @@ echo " "
|
|||
echo " The following self-contained jars (and more) have been built:"
|
||||
echo " "
|
||||
find . -name '*-selfcontained.jar'
|
||||
echo " "
|
||||
echo "For examples, see: "
|
||||
echo " "
|
||||
ls -1 examples/*/*sh
|
||||
echo " "
|
||||
echo "See also http://druid.io/docs/latest"
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.base.Throwables;
|
|||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.io.Closeables;
|
||||
import com.metamx.common.guava.Accumulator;
|
||||
import com.metamx.common.guava.CloseQuietly;
|
||||
import com.metamx.common.guava.Sequence;
|
||||
import com.metamx.common.guava.Yielder;
|
||||
import com.metamx.common.guava.Yielders;
|
||||
|
@ -70,7 +71,7 @@ public class OrderedMergeSequence<T> implements Sequence<T>
|
|||
return yielder.get();
|
||||
}
|
||||
finally {
|
||||
Closeables.closeQuietly(yielder);
|
||||
CloseQuietly.close(yielder);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,14 +46,9 @@ public class SerializerUtils
|
|||
|
||||
public void writeString(OutputSupplier<? extends OutputStream> supplier, String name) throws IOException
|
||||
{
|
||||
OutputStream out = null;
|
||||
try {
|
||||
out = supplier.getOutput();
|
||||
try (OutputStream out = supplier.getOutput()) {
|
||||
writeString(out, name);
|
||||
}
|
||||
finally {
|
||||
Closeables.closeQuietly(out);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeString(WritableByteChannel out, String name) throws IOException
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.server;
|
||||
package io.druid.common.utils;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
|
@ -1,43 +1,55 @@
|
|||
#! /bin/bash -e
|
||||
SCRIPT_DIR=$(cd $(dirname "$0") && pwd)
|
||||
|
||||
if [ -z ${1} ]; then
|
||||
VERSION=$(cat $SCRIPT_DIR/../../pom.xml | grep version | head -4 | tail -1 | sed 's_.*<version>\([^<]*\)</version>.*_\1_')
|
||||
if [ -z "$1" ]; then
|
||||
version="latest"
|
||||
else
|
||||
VERSION=${1}
|
||||
version=$1
|
||||
fi
|
||||
|
||||
#if [ -z "$(git tag -l "druid-$VERSION")" ]
|
||||
if [ -z "$(git tag -l "druid-$VERSION")" ] && [ "$VERSION" != "latest" ]; then
|
||||
echo "Version tag does not exist: druid-$VERSION"
|
||||
docs=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)/docs
|
||||
|
||||
if [ -n "$(git -C "$docs" status --porcelain --untracked-files=no content)" ]; then
|
||||
echo "Docs directory is not clean, aborting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$(git tag -l "druid-$version")" ] && [ "$version" != "latest" ]; then
|
||||
echo "Version tag does not exist: druid-$version"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
WORKING_DIR=$(mktemp -d -t druid-docs-deploy)
|
||||
tmp=$(mktemp -d -t druid-docs-deploy)
|
||||
|
||||
echo Using Version [${VERSION}]
|
||||
echo Script in [${SCRIPT_DIR}]
|
||||
echo Deploying to [${WORKING_DIR}]
|
||||
echo "Using Version [$version]"
|
||||
echo "Working directory [$tmp]"
|
||||
|
||||
if [ -d ${WORKING_DIR} ]; then
|
||||
echo DELETING ${WORKING_DIR}
|
||||
rm -rf ${WORKING_DIR}
|
||||
git clone git@github.com:druid-io/druid-io.github.io.git "$tmp"
|
||||
|
||||
target=$tmp/docs/$version
|
||||
|
||||
mkdir -p $target
|
||||
rsync -a --delete "$docs/content/" $target
|
||||
|
||||
branch=update-docs-$version
|
||||
|
||||
git -C $tmp checkout -b $branch
|
||||
git -C $tmp add -A .
|
||||
git -C $tmp commit -m "Update $version docs"
|
||||
git -C $tmp push origin $branch
|
||||
|
||||
if [ -n "$GIT_TOKEN" ]; then
|
||||
curl -u "$GIT_TOKEN:x-oauth-basic" -XPOST -d@- \
|
||||
https://api.github.com/repos/druid-io/druid-io.github.io/pulls <<EOF
|
||||
{
|
||||
"title" : "Update $version docs",
|
||||
"head" : "$branch",
|
||||
"base" : "master"
|
||||
}
|
||||
EOF
|
||||
|
||||
else
|
||||
echo "GitHub personal token not provided, not submitting pull request"
|
||||
echo "Please go to https://github.com/druid-io/druid-io.github.io and submit a pull request from the \`$branch\` branch"
|
||||
fi
|
||||
|
||||
git clone git@github.com:druid-io/druid-io.github.io.git ${WORKING_DIR}
|
||||
|
||||
DOC_DIR=${WORKING_DIR}/docs/${VERSION}/
|
||||
|
||||
mkdir -p ${DOC_DIR}
|
||||
cp -r ${SCRIPT_DIR}/../content/* ${DOC_DIR}
|
||||
|
||||
BRANCH=docs-${VERSION}
|
||||
|
||||
pushd ${WORKING_DIR}
|
||||
git checkout -b ${BRANCH}
|
||||
git add .
|
||||
git commit -m "Deploy ${VERSION} docs"
|
||||
git push origin ${BRANCH}
|
||||
popd
|
||||
|
||||
rm -rf ${WORKING_DIR}
|
||||
rm -rf $tmp
|
||||
|
|
|
@ -1,48 +1,10 @@
|
|||
|
||||
<!-- Start page_footer include -->
|
||||
<div class="container">
|
||||
<footer>
|
||||
<div class="container">
|
||||
<hr>
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<address>
|
||||
<strong>CONTACT US</strong>
|
||||
<a href="mailto:info@druid.io">info@druid.io</a>
|
||||
</address>
|
||||
<address>
|
||||
<div class="soc">
|
||||
<a href="https://twitter.com/druidio"></a>
|
||||
<a href="https://github.com/metamx/druid" class="github"></a>
|
||||
<a href="http://www.meetup.com/Open-Druid/" class="meet"></a>
|
||||
<a href="http://druid.io/feed/" class="rss" target="_blank"></a>
|
||||
</div>
|
||||
</div>
|
||||
<ul class="col-md-4 list-unstyled">
|
||||
<li><a href="/"><strong>DRUID</strong></a></li>
|
||||
<li><a href="/druid.html">What is Druid?</a></li>
|
||||
<li><a href="/downloads.html">Downloads</a></li>
|
||||
<li><a target="_blank" href="Home.html">Documentation</a></li>
|
||||
</ul>
|
||||
<ul class="col-md-4 list-unstyled">
|
||||
<li><a href="/community.html"><strong>SUPPORT</strong></a></li>
|
||||
<li><a href="/community.html">Community</a></li>
|
||||
<li><a href="/faq.html">FAQ</a></li>
|
||||
<li><a href="/licensing.html">Licensing</a></li>
|
||||
<li><a href="/blog"><strong>BLOG</strong></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
</footer>
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
||||
</script>
|
||||
<script type="text/javascript">
|
||||
try {
|
||||
var pageTracker = _gat._getTracker("UA-40280432-1");
|
||||
pageTracker._trackPageview();
|
||||
} catch(err) {}
|
||||
<script src="http://code.jquery.com/jquery.min.js"></script>
|
||||
</script>
|
||||
<!-- stop page_footer include -->
|
||||
|
|
|
@ -1,25 +1,16 @@
|
|||
|
||||
<!-- Start page_header include -->
|
||||
<div class="navbar navbar-inverse navbar-static-top">
|
||||
<div class="container druid-navbar">
|
||||
<div class="navbar navbar-inverse navbar-static-top druid-nav">
|
||||
<div class="container">
|
||||
<div class="navbar-header">
|
||||
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
</button>
|
||||
<a class="navbar-brand" href="/">Druid</a>
|
||||
</div>
|
||||
<div class="navbar-collapse collapse">
|
||||
<ul class="nav navbar-nav">
|
||||
<li {% if page.id == 'home' %} class="active"{% endif %}><a href="/">Home</a></li>
|
||||
<li {% if page.sectionid == 'druid' %} class="active"{% endif %}><a href="/druid.html">What is Druid?</a></li>
|
||||
<li {% if page.sectionid == 'downloads' %} class="active"{% endif %}><a href="/downloads.html">Downloads</a></li>
|
||||
<li {% if page.sectionid == 'docs' %} class="active"{% endif %}><a href="https://github.com/metamx/druid/wiki">Documentation</a></li>
|
||||
<li {% if page.sectionid == 'community' %} class="active"{% endif %}><a href="/community.html">Community</a></li>
|
||||
<li {% if page.sectionid == 'faq' %} class="active"{% endif %}><a href="/faq.html">FAQ</a></li>
|
||||
<li {% if page.sectionid == 'blog' %} class="active"{% endif %}><a href="/blog">Blog</a></li>
|
||||
<li class="divider"></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -5,15 +5,13 @@
|
|||
|
||||
<title>Druid | {{page.title}}</title>
|
||||
|
||||
<!-- Latest compiled and minified CSS -->
|
||||
<link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.css">
|
||||
<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css">
|
||||
|
||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400,600,300,700,800' rel='stylesheet' type='text/css'>
|
||||
<link href='http://fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600' rel='stylesheet' type='text/css'>
|
||||
<link rel="alternate" type="application/atom+xml" href="http://druid.io/feed">
|
||||
|
||||
<link rel="stylesheet" href="//druid.io/css/main.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/header.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/footer.css">
|
||||
<link rel="stylesheet" href="//druid.io/css/syntax.css">
|
||||
|
||||
|
||||
<link rel="stylesheet" href="//druid.io/css/docs.css">
|
||||
|
|
|
@ -2,35 +2,30 @@
|
|||
<html lang="en">
|
||||
<head>
|
||||
{% include site_head.html %}
|
||||
|
||||
<link rel="stylesheet" href="css/docs.css">
|
||||
</head>
|
||||
<body>
|
||||
{% include page_header.html %}
|
||||
|
||||
<div class="container">
|
||||
<div class="page-header">
|
||||
<h1>Documentation</h1>
|
||||
<div class="druid-header">
|
||||
<div class="container">
|
||||
<h1>Documentation</h1>
|
||||
<h4></h4>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-3 toc" id="toc">
|
||||
</div>
|
||||
|
||||
<div class="col-md-9 doc-content">
|
||||
{{ content }}
|
||||
</div>
|
||||
<div class="col-md-3 toc" id="toc">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% include page_footer.html %}
|
||||
<script src="http://code.jquery.com/jquery.js"></script>
|
||||
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#toc").load("toc.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>$(function() { $(".toc").load("toc.html"); });</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
|
|
@ -155,8 +155,8 @@ Determine the number of distinct are assigned to.
|
|||
|
||||
### HyperUnique aggregator
|
||||
|
||||
Uses [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) to compute the estimated cardinality of a dimension that has been aggregated as a hyperUnique metric at indexing time.
|
||||
Uses [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) to compute the estimated cardinality of a dimension that has been aggregated as a "hyperUnique" metric at indexing time.
|
||||
|
||||
```json
|
||||
{ "type" : "hyperUnique", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||
```
|
||||
```
|
|
@ -0,0 +1,93 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
### ApproxHistogram aggregator
|
||||
|
||||
This aggregator is based on [http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf) to compute approximate histograms.
|
||||
|
||||
To use this feature, an "approxHistogram" aggregator must be included at indexing time. The ingestion aggregator can only apply to numeric values. To query for results, an "approxHistogramFold" aggregator must be included in the query.
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "approxHistogram(ingestion), approxHistogramFold(query)",
|
||||
"name" : <output_name>,
|
||||
"fieldName" : <metric_name>,
|
||||
"resolution" : <integer>,
|
||||
"numBuckets" : <integer>,
|
||||
"lowerLimit" : <float>,
|
||||
"upperLimit" : <float>
|
||||
}
|
||||
```
|
||||
|
||||
|Property|Description|Default|
|
||||
|--------|-----------|-------|
|
||||
|`resolution`|Number of centroids (data points) to store. The higher the resolution, the more accurate results are, but the slower computation will be.|50|
|
||||
|`numBuckets`|Number of output buckets for the resulting histogram.|7|
|
||||
|`lowerLimit`/`upperLimit`|Restrict the approximation to the given range. The values outside this range will be aggregated into two centroids. Counts of values outside this range are still maintained. |-INF/+INF|
|
||||
|
||||
|
||||
### Approximate Histogram post-aggregators
|
||||
|
||||
Post-aggregators used to transform opaque approximate histogram objects
|
||||
into actual histogram representations, and to compute various distribution metrics.
|
||||
|
||||
#### equal buckets post-aggregator
|
||||
|
||||
Computes a visual representation of the approximate histogram with a given number of equal-sized bins
|
||||
|
||||
```json
|
||||
{ "type" : "equalBuckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"numBuckets" : <count> }
|
||||
```
|
||||
|
||||
#### buckets post-aggregator
|
||||
|
||||
Computes a visual representation given an initial breakpoint, offset, and a bucket size.
|
||||
|
||||
```json
|
||||
{ "type" : "buckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"bucketSize" : <bucket_size>, "offset" : <offset> }
|
||||
```
|
||||
|
||||
#### custom buckets post-aggregator
|
||||
|
||||
Computes a visual representation of the approximate histogram with bins laid out according to the given breaks
|
||||
|
||||
```json
|
||||
{ "type" : "customBuckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"breaks" : [ <value>, <value>, ... ] }
|
||||
```
|
||||
|
||||
#### min post-aggregator
|
||||
|
||||
Returns the minimum value of the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "min", "name" : <output_name>, "fieldName" : <aggregator_name> }
|
||||
```
|
||||
|
||||
#### max post-aggregator
|
||||
|
||||
Returns the maximum value of the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "max", "name" : <output_name>, "fieldName" : <aggregator_name> }
|
||||
```
|
||||
|
||||
#### quantile post-aggregator
|
||||
|
||||
Computes a single quantile based on the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "quantile", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"probability" : <quantile> }
|
||||
```
|
||||
|
||||
#### quantiles post-aggregator
|
||||
|
||||
Computes an array of quantiles based on the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "quantiles", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"probabilities" : [ <quantile>, <quantile>, ... ] }
|
||||
```
|
|
@ -83,12 +83,15 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
|
|||
"type": "hashed"
|
||||
"targetPartitionSize": 5000000
|
||||
},
|
||||
"metadataUpdateSpec": {
|
||||
"updaterJobSpec": {
|
||||
"type": "db",
|
||||
"connectURI": "jdbc:mysql:\/\/localhost:7980\/test_db",
|
||||
"user": "username",
|
||||
"password": "passmeup",
|
||||
"segmentTable": "segments"
|
||||
},
|
||||
"jobProperties": {
|
||||
"mapreduce.job.queuename": "default"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
@ -109,6 +112,7 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
|
|||
|leaveIntermediate|leave behind files in the workingPath when job completes or fails (debugging tool).|no|
|
||||
|partitionsSpec|a specification of how to partition each time bucket into segments, absence of this property means no partitioning will occur.|no|
|
||||
|metadataUpdateSpec|a specification of how to update the metadata for the druid cluster these segments belong to.|yes|
|
||||
|jobProperties|a map of properties to add to the Hadoop job configuration.|no|
|
||||
|
||||
### Path specification
|
||||
|
||||
|
@ -268,7 +272,7 @@ The schema of the Hadoop Index Task contains a task "type" and a Hadoop Index Co
|
|||
|config|A Hadoop Index Config (see above).|yes|
|
||||
|hadoopCoordinates|The Maven `<groupId>:<artifactId>:<version>` of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no|
|
||||
|
||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally.
|
||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
||||
|
||||
To run the task:
|
||||
|
||||
|
|
|
@ -12,15 +12,12 @@ You can provision individual servers, loading Druid onto each machine (or buildi
|
|||
|
||||
[Apache Whirr](http://whirr.apache.org/) is a set of libraries for launching cloud services. For Druid, Whirr serves as an easy way to launch a cluster in Amazon AWS by using simple commands and configuration files (called *recipes*).
|
||||
|
||||
**NOTE:** Whirr will install Druid 0.6.121. Also, it doesn't work with JDK1.7.0_55. JDK1.7.0_45 recommended.
|
||||
**NOTE:** Whirr will install Druid 0.6.115 (an older version of Druid). Also, it doesn't work with JDK1.7.0_55. JDK1.7.0_45 recommended.
|
||||
|
||||
You'll need an AWS account, S3 Bucket and an EC2 key pair from that account so that Whirr can connect to the cloud via the EC2 API. If you haven't generated a key pair, see the [AWS documentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) or see this [Whirr FAQ](http://whirr.apache.org/faq.html#how-do-i-find-my-cloud-credentials).
|
||||
|
||||
|
||||
### Installing Whirr
|
||||
You must use a version of Whirr that includes and supports a Druid recipe. You can do it so in one of two ways:
|
||||
|
||||
#### Build the Following Version of Whirr
|
||||
### Install Whirr
|
||||
Clone the code from [https://github.com/druid-io/whirr](https://github.com/druid-io/whirr) and build Whirr:
|
||||
|
||||
git clone git@github.com:druid-io/whirr.git
|
||||
|
@ -28,23 +25,23 @@ Clone the code from [https://github.com/druid-io/whirr](https://github.com/druid
|
|||
git checkout trunk
|
||||
mvn clean install -Dmaven.test.failure.ignore=true
|
||||
|
||||
#### Build the Latest Version of Whirr
|
||||
Clone the code from the Whirr repository:
|
||||
In order to run the test below, you'll also need two files that available only from a [standard install of Druid](http://druid.io/downloads.html) or the [Druid repo](https://github.com/metamx/druid/tree/master/examples/bin/examples):
|
||||
|
||||
* `druid/examples/bin/examples/wikipedia/wikipedia_realtime.spec`
|
||||
* `druid/examples/bin/examples/indexing/wikipedia_realtime_task.json`
|
||||
|
||||
git clone git://git.apache.org/whirr.git
|
||||
|
||||
Then run `mvn install` from the root directory.
|
||||
|
||||
### Configure Whirr
|
||||
The Whirr recipe for Druid is the configuration file `$WHIRR_HOME/recipies/druid.properties`. You can edit this file to suit your needs -- it is annotated and self-explanatory. Here are some hints about that file:
|
||||
The Whirr recipe for Druid is the configuration file `$WHIRR_HOME/recipies/druid.properties`. You can edit this file to suit your needs; it is annotated and self-explanatory. Here are some hints about that file:
|
||||
|
||||
* Set `whirr.location-id` to a specific AWS region (e.g., us-east-1) if desired, else one will be chosen for you.
|
||||
* Set `whirr.location-id` to a specific AWS region if desired. If this is left blank, a region is chosen for you. The default value is `us-east-1`.
|
||||
* You can choose the hardware used with `whirr.hardware-id` to a specific instance type (e.g., m1.large). By default druid.properties, m3.2xlarge (broker, historical, middle manager), m1.xlarge (coordinator, overlord), and m1.small (zookeeper, mysql) are used.
|
||||
* If you don't choose an image via `whirr.image-id` (image must be compatible with hardware), you'll get plain vanilla Linux. Default druid.properties uses ami-018c9568 (Ubuntu 12.04).
|
||||
* SSH keys (not password protected) must exist for the local user. If they are in the default locations, `${sys:user.home}/.ssh/id_rsa` and `${sys:user.home}/.ssh/id_rsa.pub`, Whirr will find them. Otherwise, you'll have to specify them with `whirr.private-key-file` and `whirr.public-key-file`.
|
||||
* Be sure to specify the absolute path of the Druid realtime spec file `realtime.spec` in `whirr.druid.realtime.spec.path`.
|
||||
* Also make sure to specify the correct S3 bucket. Otherwise the cluster won't be able to process tasks.
|
||||
* Two Druid cluster templates (see `whirr.instance-templates`) are provided: a small cluster running on a single EC2 instance, and a larger cluster running on multiple instances.
|
||||
* You must specify the path to an S3 bucket. Otherwise the cluster won't be able to process tasks.
|
||||
* To successfully submit the test task below, you'll need to specify the location of the `wikipedia_realtime.spec` in the property `whirr.druid.realtime.spec.path`.
|
||||
* Specify Druid version only if [Druid extenions](Modules.html) are being used.
|
||||
|
||||
The following AWS information must be set in `druid.properties`, as environment variables, or in the file `$WHIRR_HOME/conf/credentials`:
|
||||
|
||||
|
@ -54,7 +51,7 @@ The following AWS information must be set in `druid.properties`, as environment
|
|||
|
||||
How to get the IDENTITY and CREDENTIAL keys is discussed above.
|
||||
|
||||
In order to configure each node, you can edit `services/druid/src/main/resources/functions/start_druid.sh` for JVM configuration and `services/druid/src/main/resources/functions/configure_[NODE_NAME].sh` for specific node configuration. For more information on configuration, read the Druid documentations about it (http://druid.io/docs/0.6.116/Configuration.html).
|
||||
In order to configure each node, you can edit `services/druid/src/main/resources/functions/start_druid.sh` for JVM configuration and `services/druid/src/main/resources/functions/configure_[NODE_NAME].sh` for specific node configuration. For more information on configuration, see the [Druid configuration documentation](Configuration.html).
|
||||
|
||||
### Start a Test Cluster With Whirr
|
||||
Run the following command:
|
||||
|
@ -66,14 +63,14 @@ If Whirr starts without any errors, you should see the following message:
|
|||
|
||||
Running on provider aws-ec2 using identity <your-aws-id-here>
|
||||
|
||||
You can then use the EC2 dashboard to locate the instance and confirm that it has started up.
|
||||
You can then use the EC2 dashboard to locate the instances and confirm that they have started up.
|
||||
|
||||
If both the instance and the Druid cluster launch successfully, a few minutes later other messages to STDOUT should follow with information returned from EC2, including the instance ID:
|
||||
If both the instances and the Druid cluster launch successfully, a few minutes later other messages to STDOUT should follow with information returned from EC2, including the instance ID:
|
||||
|
||||
Started cluster of 1 instances
|
||||
Started cluster of 8 instances
|
||||
Cluster{instances=[Instance{roles=[zookeeper, druid-mysql, druid-coordinator, druid-broker, druid-historical, druid-realtime], publicIp= ...
|
||||
|
||||
The final message will contain login information for the instance.
|
||||
The final message will contain login information for the instances.
|
||||
|
||||
Note that Whirr will return an exception if any of the nodes fail to launch, and the cluster will be destroyed. To destroy the cluster manually, run the following command:
|
||||
|
||||
|
@ -85,9 +82,11 @@ Note that Whirr will return an exception if any of the nodes fail to launch, and
|
|||
Now you can run an indexing task and a simple query to see if all the nodes have launched correctly. We are going to use a Wikipedia example again. For a realtime indexing task, run the following command:
|
||||
|
||||
```bash
|
||||
curl -X 'POST' -H 'Content-Type:application/json' -d @#{YOUR_DRUID_DIRECTORY}/examples/indexing/wikipedia_realtime_task.json #{OVERLORD_PUBLIC_IP_ADDR}:#{PORT}/druid/indexer/v1/task
|
||||
curl -X 'POST' -H 'Content-Type:application/json' -d @#{PATH_TO}/wikipedia_realtime_task.json #{OVERLORD_PUBLIC_IP_ADDR}:#{PORT}/druid/indexer/v1/task
|
||||
```
|
||||
Issuing the request should return a task ID.
|
||||
where OVERLORD_PUBLIC_IP_ADDR should be available from the EC2 information logged to STDOUT, the Overlord port is 8080 by default, and `wikipedia_realtime_task.json` is discussed above.
|
||||
|
||||
Issuing this request should return a task ID.
|
||||
|
||||
To check the state of the overlord, open up your browser and go to `#{OVERLORD_PUBLIC_IP_ADDR}:#{PORT}/console.html`.
|
||||
|
||||
|
|
|
@ -12,8 +12,12 @@ The broker module uses several of the default modules in [Configuration](Configu
|
|||
|
||||
|Property|Possible Values|Description|Default|
|
||||
|--------|---------------|-----------|-------|
|
||||
|`druid.broker.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|
||||
|`druid.broker.balancer.type`|`random`, `connectionCount`|Determines how the broker balances connections to historical nodes. `random` choose randomly, `connectionCount` picks the node with the fewest number of active connections to|`random`|
|
||||
|`druid.broker.select.tier`|`highestPriority`, `lowestPriority`, `custom`|If segments are cross-replicated across tiers in a cluster, you can tell the broker to prefer to select segments in a tier with a certain priority.|`highestPriority`|
|
||||
|`druid.broker.select.tier.custom.priorities`|`An array of integer priorities.`|Select servers in tiers with a custom priority list.|None|
|
||||
|`druid.broker.cache.type`|`local`, `memcached`|The type of cache to use for queries.|`local`|
|
||||
|`druid.broker.cache.unCacheable`|All druid query types|All query types to not cache.|["groupBy", "select"]|
|
||||
|
||||
|
||||
#### Local Cache
|
||||
|
||||
|
|
|
@ -154,6 +154,7 @@ Druid storage nodes maintain information about segments they have already downlo
|
|||
|--------|-----------|-------|
|
||||
|`druid.segmentCache.locations`|Segments assigned to a Historical node are first stored on the local file system (in a disk cache) and then served by the Historical node. These locations define where that local cache resides. | none (no caching) |
|
||||
|`druid.segmentCache.deleteOnRemove`|Delete segment files from cache once a node is no longer serving a segment.|true|
|
||||
|`druid.segmentCache.dropSegmentDelayMillis`|How long a node delays before completely dropping segment.|5 minutes|
|
||||
|`druid.segmentCache.infoDir`|Historical nodes keep track of the segments they are serving so that when the process is restarted they can reload the same segments without waiting for the Coordinator to reassign. This path defines where this metadata is kept. Directory will be created if needed.|${first_location}/info_dir|
|
||||
|
||||
### Jetty Server Module
|
||||
|
|
|
@ -51,7 +51,7 @@ Issuing a GET request at the same URL will return the spec that is currently in
|
|||
|
||||
|Property|Description|Default|
|
||||
|--------|-----------|-------|
|
||||
|`millisToWaitBeforeDeleting`|How long does the coordinator need to be active before it can start deleting segments.|90000 (15 mins)|
|
||||
|`millisToWaitBeforeDeleting`|How long does the coordinator need to be active before it can start removing (marking unused) segments in metadata storage.|900000 (15 mins)|
|
||||
|`mergeBytesLimit`|The maximum number of bytes to merge (for segments).|524288000L|
|
||||
|`mergeSegmentsLimit`|The maximum number of segments that can be in a single [merge task](Tasks.html).|100|
|
||||
|`maxSegmentsToMove`|The maximum number of segments that can be moved at any given time.|5|
|
||||
|
|
|
@ -4,7 +4,7 @@ layout: doc_page
|
|||
Data Formats for Ingestion
|
||||
==========================
|
||||
|
||||
Druid can ingest data in JSON, CSV, or TSV. While most examples in the documentation use data in JSON format, it is not difficult to configure Druid to ingest CSV or TSV data.
|
||||
Druid can ingest data in JSON, CSV, or custom delimited data such as TSV. While most examples in the documentation use data in JSON format, it is not difficult to configure Druid to ingest CSV or other delimited data.
|
||||
|
||||
## Formatting the Data
|
||||
The following are three samples of the data used in the [Wikipedia example](Tutorial:-Loading-Your-Data-Part-1.html).
|
||||
|
@ -41,8 +41,8 @@ _TSV_
|
|||
|
||||
Note that the CSV and TSV data do not contain column heads. This becomes important when you specify the data for ingesting.
|
||||
|
||||
## Configuring Ingestion For the Indexing Service
|
||||
If you use the [indexing service](Indexing-Service.html) for ingesting the data, a [task](Tasks.html) must be configured and submitted. Tasks are configured with a JSON object which, among other things, specifies the data source and type. In the Wikipedia example, JSON data was read from a local file. The task spec contains a firehose element to specify this:
|
||||
## Configuration
|
||||
All forms of Druid ingestion require some form of schema object. An example blob of json pertaining to the data format may look something like this:
|
||||
|
||||
```json
|
||||
"firehose" : {
|
||||
|
|
|
@ -19,13 +19,13 @@ Clone Druid and build it:
|
|||
git clone https://github.com/metamx/druid.git druid
|
||||
cd druid
|
||||
git fetch --tags
|
||||
git checkout druid-0.6.121
|
||||
git checkout druid-0.6.138
|
||||
./build.sh
|
||||
```
|
||||
|
||||
### Downloading the DSK (Druid Standalone Kit)
|
||||
|
||||
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.121-bin.tar.gz) a stand-alone tarball and run it:
|
||||
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.138-bin.tar.gz) a stand-alone tarball and run it:
|
||||
|
||||
``` bash
|
||||
tar -xzf druid-services-0.X.X-bin.tar.gz
|
||||
|
|
|
@ -36,10 +36,40 @@ See [Examples](Examples.html). This firehose connects directly to the twitter sp
|
|||
|
||||
See [Examples](Examples.html). This firehose creates a stream of random numbers.
|
||||
|
||||
#### RabbitMqFirehouse
|
||||
#### RabbitMqFirehose
|
||||
|
||||
This firehose ingests events from a define rabbit-mq queue.
|
||||
|
||||
#### IngestSegmentFirehose
|
||||
|
||||
This Firehose can be used to read the data from existing druid segments.
|
||||
It can be used ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment.
|
||||
A sample ingest firehose spec is shown below -
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "ingestSegment",
|
||||
"dataSource" : "wikipedia",
|
||||
"interval" : "2013-01-01/2013-01-02",
|
||||
"dimensions":[],
|
||||
"metrics":[]
|
||||
}
|
||||
```
|
||||
|
||||
|property|description|required?|
|
||||
|--------|-----------|---------|
|
||||
|type|ingestSegment. Type of firehose|yes|
|
||||
|dataSource|A String defining the data source to fetch rows from, very similar to a table in a relational database|yes|
|
||||
|interval|A String representing ISO-8601 Interval. This defines the time range to fetch the data over.|yes|
|
||||
|dimensions|The list of dimensions to select. If left empty, all dimensions are selected.|no|
|
||||
|metrics|The list of metrics to select. If left empty, all metrics are returned.|no|
|
||||
|filter| See [Filters](Filters.html)|yes|
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Parsing Data
|
||||
------------
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ The following configs only apply if the overlord is running in remote mode:
|
|||
|Property|Description|Default|
|
||||
|--------|-----------|-------|
|
||||
|`druid.indexer.runner.taskAssignmentTimeout`|How long to wait after a task as been assigned to a middle manager before throwing an error.|PT5M|
|
||||
|`druid.indexer.runner.minWorkerVersion`|The minimum middle manager version to send tasks to. |none|
|
||||
|`druid.indexer.runner.minWorkerVersion`|The minimum middle manager version to send tasks to. |"0"|
|
||||
|`druid.indexer.runner.compressZnodes`|Indicates whether or not the overlord should expect middle managers to compress Znodes.|false|
|
||||
|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|
|
||||
|
||||
|
@ -69,10 +69,9 @@ A sample worker setup spec is shown below:
|
|||
"keyName":"keyName"
|
||||
},
|
||||
"userData":{
|
||||
"classType":"galaxy",
|
||||
"env":"druid",
|
||||
"version":"druid_version",
|
||||
"type":"sample_cluster/worker"
|
||||
"impl":"string",
|
||||
"data":"version=:VERSION:",
|
||||
"versionReplacementString":":VERSION:"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
@ -81,8 +80,7 @@ Issuing a GET request at the same URL will return the current worker setup spec
|
|||
|
||||
|Property|Description|Default|
|
||||
|--------|-----------|-------|
|
||||
|`minVersion`|The coordinator only assigns tasks to workers with a version greater than the minVersion. If this is not specified, the minVersion will be the same as the coordinator version.|none|
|
||||
|`minNumWorkers`|The minimum number of workers that can be in the cluster at any given time.|0|
|
||||
|`maxNumWorkers`|The maximum number of workers that can be in the cluster at any given time.|0|
|
||||
|`nodeData`|A JSON object that contains metadata about new nodes to create.|none|
|
||||
|`userData`|A JSON object that contains metadata about how the node should register itself on startup. This data is sent with node creation requests.|none|
|
||||
|`nodeData`|A JSON object that describes how to launch new nodes. Currently, only EC2 is supported.|none; required|
|
||||
|`userData`|A JSON object that describes how to configure new nodes. Currently, only EC2 is supported. If you have set druid.indexer.autoscale.workerVersion, this must have a versionReplacementString. Otherwise, a versionReplacementString is not necessary.|none; optional|
|
||||
|
|
|
@ -37,6 +37,11 @@ You can check the coordinator console located at `<COORDINATOR_IP>:<PORT>/cluste
|
|||
|
||||
You can check `<BROKER_IP>:<PORT>/druid/v2/datasources/<YOUR_DATASOURCE>?interval=0/3000` for the dimensions and metrics that have been created for your datasource. Make sure that the name of the aggregators you use in your query match one of these metrics. Also make sure that the query interval you specify match a valid time range where data exists. Note: the broker endpoint will only return valid results on historical segments.
|
||||
|
||||
## How can I Reindex existing data in Druid with schema changes?
|
||||
|
||||
You can use IngestSegmentFirehose with index task to ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment.
|
||||
See [Firehose](Firehose.html) for more details on IngestSegmentFirehose.
|
||||
|
||||
## More information
|
||||
|
||||
Getting data into Druid can definitely be difficult for first time users. Please don't hesitate to ask questions in our IRC channel or on our [google groups page](https://groups.google.com/forum/#!forum/druid-development).
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Ingesting from Kafka 8
|
||||
----------------------
|
||||
|
||||
The previous examples are for Kafka 7. To support Kafka 8, a couple changes need to be made:
|
||||
|
||||
- Update realtime node's configs for Kafka 8 extensions
|
||||
- e.g.
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.138",...]`
|
||||
- becomes
|
||||
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.138",...]`
|
||||
- Update realtime task config for changed keys
|
||||
- `firehose.type`, `plumber.rejectionPolicyFactory`, and all of `firehose.consumerProps` changes.
|
||||
|
||||
```json
|
||||
|
||||
"firehose" : {
|
||||
"type" : "kafka-0.8",
|
||||
"consumerProps" : {
|
||||
"zookeeper.connect": "localhost:2181",
|
||||
"zookeeper.connection.timeout.ms": "15000",
|
||||
"zookeeper.session.timeout.ms": "15000",
|
||||
"zookeeper.sync.time.ms": "5000",
|
||||
"group.id": "topic-pixel-local",
|
||||
"fetch.message.max.bytes": "1048586",
|
||||
"auto.offset.reset": "largest",
|
||||
"auto.commit.enable": "false"
|
||||
},
|
||||
"feed" : "druidtest",
|
||||
"parser" : {
|
||||
"timestampSpec" : {
|
||||
"column" : "utcdt",
|
||||
"format" : "iso"
|
||||
},
|
||||
"data" : {
|
||||
"format" : "json"
|
||||
},
|
||||
"dimensionExclusions" : [
|
||||
"wp"
|
||||
]
|
||||
}
|
||||
},
|
||||
"plumber" : {
|
||||
"type" : "realtime",
|
||||
"windowPeriod" : "PT10m",
|
||||
"segmentGranularity":"hour",
|
||||
"basePersistDirectory" : "/tmp/realtime/basePersist",
|
||||
"rejectionPolicyFactory": {
|
||||
"type": "messageTime"
|
||||
}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,39 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Logging
|
||||
==========================
|
||||
|
||||
Druid nodes will emit logs that are useful for debugging to the console. Druid nodes also emit periodic metrics about their state. For more about metrics, see [Configuration](Configuration.html). Metric logs are printed to the console by default, and can be disabled with `-Ddruid.emitter.logging.logLevel=debug`.
|
||||
|
||||
Druid uses [log4j](http://logging.apache.org/log4j/2.x/) for logging, and console logs can be configured by adding a log4j.xml file. Add this xml file to your classpath if you want to override default Druid log configuration.
|
||||
|
||||
An example log4j.xml file is shown below:
|
||||
|
||||
```
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||
|
||||
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||
|
||||
<appender name="ConsoleAppender" class="org.apache.log4j.ConsoleAppender">
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{ISO8601} %-5p [%t] %c - %m%n"/>
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<!-- ServerView-related stuff is way too chatty -->
|
||||
<logger name="io.druid.client.BatchServerInventoryView">
|
||||
<level value="warn"/>
|
||||
</logger>
|
||||
<logger name="io.druid.curator.inventory.CuratorInventoryManager">
|
||||
<level value="warn"/>
|
||||
</logger>
|
||||
|
||||
<root>
|
||||
<priority value="info" />
|
||||
<appender-ref ref="ConsoleAppender"/>
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
||||
```
|
|
@ -48,10 +48,9 @@ Middle managers pass their configurations down to their child peons. The middle
|
|||
|`druid.worker.capacity`|Maximum number of tasks the middle manager can accept.|Number of available processors - 1|
|
||||
|`druid.indexer.runner.compressZnodes`|Indicates whether or not the middle managers should compress Znodes.|false|
|
||||
|`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288|
|
||||
|`druid.indexer.runner.taskDir`|Temporary intermediate directory used during task execution.|/tmp/persistent|
|
||||
|`druid.indexer.runner.javaCommand`|Command required to execute java.|java|
|
||||
|`druid.indexer.runner.javaOpts`|-X Java options to run the peon in its own JVM.|""|
|
||||
|`druid.indexer.runner.classpath`|Java classpath for the peon.|System.getProperty("java.class.path")|
|
||||
|`druid.indexer.runner.startPort`|The port that peons begin running on.|8080|
|
||||
|`druid.indexer.runner.startPort`|The port that peons begin running on.|8081|
|
||||
|`druid.indexer.runner.allowedPrefixes`|Whitelist of prefixes for configs that can be passed down to child peons.|"com.metamx", "druid", "io.druid", "user.timezone","file.encoding"|
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Working with different versions of Hadoop may require a bit of extra work for the time being. We will make changes to support different Hadoop versions in the near future. If you have problems outside of these instructions, please feel free to contact us in IRC or on the [forum](https://groups.google.com/forum/#!forum/druid-development).
|
||||
|
||||
Working with Hadoop 2.x
|
||||
-----------------------
|
||||
The default version of Hadoop bundled with Druid is 2.3. This should work out of the box.
|
||||
|
||||
To override the default Hadoop version, both the Hadoop Index Task and the standalone Hadoop indexer support the parameter `hadoopDependencyCoordinates`. You can pass another set of Hadoop coordinates through this parameter (e.g. You can specify coordinates for Hadoop 2.4.0 as `["org.apache.hadoop:hadoop-client:2.4.0"]`).
|
||||
|
||||
The Hadoop Index Task takes this parameter has part of the task JSON and the standalone Hadoop indexer takes this parameter as a command line argument.
|
||||
|
||||
|
||||
Working with Hadoop 1.x and older
|
||||
---------------------------------
|
||||
We recommend recompiling Druid with your particular version of Hadoop by changing the dependencies in Druid's pom.xml files. Make sure to also either override the default `hadoopDependencyCoordinates` in the code or pass your Hadoop version in as part of indexing.
|
|
@ -22,6 +22,7 @@ Additional peon configs include:
|
|||
|`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|/tmp/persistent/tasks|
|
||||
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|/tmp/druid-indexing|
|
||||
|`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|50000|
|
||||
|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.3.0|
|
||||
|`druid.indexer.task.chathandler.type`|Choices are "noop" and "announce". Certain tasks will use service discovery to announce an HTTP endpoint that events can be posted to.|noop|
|
||||
|
||||
If the peon is running in remote mode, there must be an overlord up and running. Running peons in remote mode require the following configurations:
|
||||
|
|
|
@ -64,7 +64,7 @@ Example JavaScript aggregator:
|
|||
"function": "function(delta, total) { return 100 * Math.abs(delta) / total; }"
|
||||
}
|
||||
```
|
||||
### `hyperUniqueCardinality` post-aggregator
|
||||
### HyperUnique Cardinality post-aggregator
|
||||
|
||||
The hyperUniqueCardinality post aggregator is used to wrap a hyperUnique object such that it can be used in post aggregations.
|
||||
|
||||
|
@ -90,8 +90,7 @@ It can be used in a sample calculation as so:
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
### Example Usage
|
||||
#### Example Usage
|
||||
|
||||
In this example, let’s calculate a simple percentage using post aggregators. Let’s imagine our data set has a metric called "total".
|
||||
|
||||
|
@ -122,5 +121,4 @@ The format of the query JSON is as follows:
|
|||
}
|
||||
...
|
||||
}
|
||||
|
||||
```
|
||||
```
|
||||
|
|
|
@ -4,6 +4,8 @@ layout: doc_page
|
|||
Production Cluster Configuration
|
||||
================================
|
||||
|
||||
__This configuration is an example of what a production cluster could look like. Many other hardware combinations are possible! Cheaper hardware is absolutely possible.__
|
||||
|
||||
This production Druid cluster assumes that MySQL and Zookeeper are already set up. The deep storage that is used for examples is S3 and memcached is used for a distributed cache.
|
||||
|
||||
The nodes that respond to queries (Historical, Broker, and Middle manager nodes) will use as many cores as are available, depending on usage, so it is best to keep these on dedicated machines. The upper limit of effectively utilized cores is not well characterized yet and would depend on types of queries, query load, and the schema. Historical daemons should have a heap a size of at least 1GB per core for normal usage, but could be squeezed into a smaller heap for testing. Since in-memory caching is essential for good performance, even more RAM is better. Broker nodes will use RAM for caching, so they do more than just route queries. SSDs are highly recommended for Historical nodes not all data is loaded in available memory.
|
||||
|
@ -55,7 +57,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/overlord
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
@ -137,7 +139,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/middlemanager
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121","io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138","io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
@ -153,9 +155,8 @@ druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
|
|||
druid.indexer.logs.s3Prefix=prod/logs/v1
|
||||
|
||||
# Dedicate more resources to peons
|
||||
druid.indexer.runner.javaOpts=-server -Xmx6g -Xms6g -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
|
||||
druid.indexer.runner.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.runner.javaOpts=-server -Xmx3g -XX:+UseG1GC -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
|
||||
druid.indexer.task.baseTaskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.chathandler.type=announce
|
||||
|
||||
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
||||
|
@ -285,7 +286,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/historical
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
|
|
@ -1,12 +1,17 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
|
||||
Querying
|
||||
========
|
||||
|
||||
Queries are made using an HTTP REST style request to a [Broker](Broker.html), [Historical](Historical.html), or [Realtime](Realtime.html) node. The query is expressed in JSON and each of these node types expose the same REST query interface.
|
||||
Queries are made using an HTTP REST style request to a [Broker](Broker.html),
|
||||
[Historical](Historical.html), or [Realtime](Realtime.html) node. The
|
||||
query is expressed in JSON and each of these node types expose the same
|
||||
REST query interface.
|
||||
|
||||
We start by describing an example query with additional comments that mention possible variations. Query operators are also summarized in a table below.
|
||||
We start by describing an example query with additional comments that mention
|
||||
possible variations. Query operators are also summarized in a table below.
|
||||
|
||||
Example Query "rand"
|
||||
--------------------
|
||||
|
@ -108,21 +113,52 @@ Query Operators
|
|||
|
||||
The following table summarizes query properties.
|
||||
|
||||
|query types|property|description|required?|
|
||||
|-----------|--------|-----------|---------|
|
||||
|timeseries, groupBy, search, timeBoundary|dataSource|query is applied to this data source|yes|
|
||||
|timeseries, groupBy, search|intervals|range of time series to include in query|yes|
|
||||
|timeseries, groupBy, search, timeBoundary|context|This is a key-value map that can allow the query to alter some of the behavior of a query. It is primarily used for debugging, for example if you include `"bySegment":true` in the map, you will get results associated with the data segment they came from.|no|
|
||||
|timeseries, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|
||||
|timeseries, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|
||||
Properties shared by all query types
|
||||
|
||||
|property |description|required?|
|
||||
|----------|-----------|---------|
|
||||
|dataSource|query is applied to this data source|yes|
|
||||
|intervals |range of time series to include in query|yes|
|
||||
|context |This is a key-value map used to alter some of the behavior of a query. See [Query Context](#query-context) below|no|
|
||||
|
||||
|
||||
|query type|property |description|required?|
|
||||
|----------|----------|-----------|---------|
|
||||
|timeseries, topN, groupBy, search|filter|Specifies the filter (the "WHERE" clause in SQL) for the query. See [Filters](Filters.html)|no|
|
||||
|timeseries, topN, groupBy, search|granularity|the timestamp granularity to bucket results into (i.e. "hour"). See [Granularities](Granularities.html) for more information.|no|
|
||||
|timeseries, topN, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
||||
|timeseries, topN, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
||||
|groupBy|dimensions|constrains the groupings; if empty, then one value per time granularity bucket|yes|
|
||||
|timeseries, groupBy|aggregations|aggregations that combine values in a bucket. See [Aggregations](Aggregations.html).|yes|
|
||||
|timeseries, groupBy|postAggregations|aggregations of aggregations. See [Post Aggregations](Post Aggregations.html).|yes|
|
||||
|search|limit|maximum number of results (default is 1000), a system-level maximum can also be set via `com.metamx.query.search.maxSearchLimit`|no|
|
||||
|search|searchDimensions|Dimensions to apply the search query to. If not specified, it will search through all dimensions.|no|
|
||||
|search|query|The query portion of the search query. This is essentially a predicate that specifies if something matches.|yes|
|
||||
|
||||
Additional Information about Query Types
|
||||
----------------------------------------
|
||||
Query Context
|
||||
-------------
|
||||
|
||||
[TimeseriesQuery](TimeseriesQuery.html)
|
||||
|property |default | description |
|
||||
|--------------|---------------------|----------------------|
|
||||
|timeout | `0` (no timeout) | Query timeout in milliseconds, beyond which unfinished queries will be cancelled |
|
||||
|priority | `0` | Query Priority. Queries with higher priority get precedence for computational resources.|
|
||||
|queryId | auto-generated | Unique identifier given to this query. If a query ID is set or known, this can be used to cancel the query |
|
||||
|useCache | `true` | Flag indicating whether to leverage the query cache for this query. This may be overriden in the broker or historical node configuration |
|
||||
|populateCache | `true` | Flag indicating whether to save the results of the query to the query cache. Primarily used for debugging. This may be overriden in the broker or historical node configuration |
|
||||
|bySegment | `false` | Return "by segment" results. Pimarily used for debugging, setting it to `true` returns results associated with the data segment they came from |
|
||||
|finalize | `true` | Flag indicating whether to "finalize" aggregation results. Primarily used for debugging. For instance, the `hyperUnique` aggregator will return the full HyperLogLog sketch instead of the estimated cardinality when this flag is set to `false` |
|
||||
|
||||
Query Cancellation
|
||||
------------------
|
||||
|
||||
Queries can be cancelled explicitely using their unique identifier. If the
|
||||
query identifier is set at the time of query, or is otherwise known, the following
|
||||
endpoint can be used on the broker or router to cancel the query.
|
||||
|
||||
```sh
|
||||
DELETE /druid/v2/{queryId}
|
||||
```
|
||||
|
||||
For example, if the query ID is `abc123`, the query can be cancelled as follows:
|
||||
|
||||
```sh
|
||||
curl -X DELETE "http://host:port/druid/v2/abc123"
|
||||
```
|
||||
|
|
|
@ -27,7 +27,7 @@ druid.host=localhost
|
|||
druid.service=realtime
|
||||
druid.port=8083
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
@ -76,7 +76,7 @@ druid.host=#{IP_ADDR}:8080
|
|||
druid.port=8080
|
||||
druid.service=druid/prod/realtime
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121","io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138","io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
|
||||
|
||||
Rolling Updates
|
||||
===============
|
||||
|
||||
For rolling Druid cluster updates with no downtime, we recommend updating Druid nodes in the following order:
|
||||
|
||||
1. Historical Nodes
|
||||
2. Indexing Service/Real-time Nodes
|
||||
3. Broker Nodes
|
||||
4. Coordinator Nodes
|
||||
|
||||
## Historical Nodes
|
||||
|
||||
Historical nodes can be updated one at a time. Each historical node has a startup time to memory map all the segments it was serving before the update. The startup time typically takes a few seconds to a few minutes, depending on the hardware of the node. As long as each historical node is updated with a sufficient delay (greater than the time required to start a single node), you can rolling update the entire historical cluster.
|
||||
|
||||
## Standalone Real-time nodes
|
||||
|
||||
Standalone real-time nodes can be updated one at a time in a rolling fashion.
|
||||
|
||||
## Indexing Service
|
||||
|
||||
### With Autoscaling
|
||||
|
||||
Overlord nodes will try to launch new middle manager nodes and terminate old ones without dropping data. This process is based on the configuration `druid.indexer.runner.minWorkerVersion=#{VERSION}`. Each time you update your overlord node, the `VERSION` value should be increased.
|
||||
|
||||
The config `druid.indexer.autoscale.workerVersion=#{VERSION}` also needs to be set.
|
||||
|
||||
### Without Autoscaling
|
||||
|
||||
Middle managers can be updated in a rolling fashion based on API.
|
||||
|
||||
To prepare a middle manager for update, send a POST request to `<MiddleManager_IP:PORT>/druid/worker/v1/disable`. The overlord will now no longer send tasks to this middle manager.
|
||||
|
||||
Current tasks will still try to complete. To view all existing tasks, send a GET request to `<MiddleManager_IP:PORT>/druid/worker/v1/tasks`. When this list is empty, the middle manager can be updated. After the middle manager is updated, it is automatically enabled again. You can also manually enable middle managers POSTing to `<MiddleManager_IP:PORT>/druid/worker/v1/enable`.
|
||||
|
||||
## Broker Nodes
|
||||
|
||||
Broker nodes can be updated one at a time in a rolling fashion. There needs to be some delay between updating each node as brokers must load the entire state of the cluster before they return valid results.
|
||||
|
||||
## Coordinator Nodes
|
||||
|
||||
Coordinator nodes can be updated in a rolling fashion.
|
|
@ -28,7 +28,7 @@ Configuration:
|
|||
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
|
@ -37,6 +37,7 @@ Configuration:
|
|||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
-Ddruid.indexer.runner.startPort=8088
|
||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
|
||||
```
|
||||
|
|
|
@ -77,7 +77,7 @@ The Hadoop Index Task is used to index larger data sets that require the paralle
|
|||
|hadoopCoordinates|The Maven \<groupId\>:\<artifactId\>:\<version\> of Hadoop to use. The default is "org.apache.hadoop:hadoop-client:2.3.0".|no|
|
||||
|
||||
|
||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `metadataUpdateSpec`. The Indexing Service takes care of setting these fields internally.
|
||||
The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally.
|
||||
|
||||
#### Using your own Hadoop distribution
|
||||
|
||||
|
@ -220,7 +220,7 @@ Kill tasks delete all information about a segment and removes it from deep stora
|
|||
"type": "kill",
|
||||
"id": <task_id>,
|
||||
"dataSource": <task_datasource>,
|
||||
"segments": <JSON list of DataSegment objects to append>
|
||||
"interval" : <all_segments_in_this_interval_will_die!>
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ Time boundary queries return the earliest and latest data points of a data set.
|
|||
```json
|
||||
{
|
||||
"queryType" : "timeBoundary",
|
||||
"dataSource": "sample_datasource"
|
||||
"dataSource": "sample_datasource",
|
||||
"bound" : < "maxTime" | "minTime" > # optional, defaults to returning both timestamps if not set
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -17,6 +18,7 @@ There are 3 main parts to a time boundary query:
|
|||
|--------|-----------|---------|
|
||||
|queryType|This String should always be "timeBoundary"; this is the first thing Druid looks at to figure out how to interpret the query|yes|
|
||||
|dataSource|A String defining the data source to query, very similar to a table in a relational database|yes|
|
||||
|bound | Optional, set to `maxTime` or `minTime` to return only the latest or earliest timestamp. Default to returning both if not set| no |
|
||||
|context|An additional JSON Object which can be used to specify certain flags.|no|
|
||||
|
||||
The format of the result is:
|
||||
|
|
|
@ -60,3 +60,19 @@ See [http://www.davekoelle.com/alphanum.html](http://www.davekoelle.com/alphanum
|
|||
|--------|-----------|---------|
|
||||
|type|this indicates an alpha-numeric sort|yes|
|
||||
|previousStop|the starting point of the alpha-numeric sort. For example, if a previousStop value is 'b', all values before 'b' are discarded. This field can be used to paginate through all the dimension values.|no|
|
||||
|
||||
## Inverted TopNMetricSpec
|
||||
|
||||
Sort dimension values in inverted order, i.e inverts the order of the delegate metric spec. It can be used to sort the values in descending order.
|
||||
|
||||
```json
|
||||
"metric": {
|
||||
"type": "inverted",
|
||||
"metric": <delegate_top_n_metric_spec>
|
||||
}
|
||||
```
|
||||
|
||||
|property|description|required?|
|
||||
|--------|-----------|---------|
|
||||
|type|this indicates an inverted sort|yes|
|
||||
|metric|the delegate metric spec. |yes|
|
||||
|
|
|
@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
|||
|
||||
### Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.121-bin.tar.gz). Download this file to a directory of your choosing.
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.138-bin.tar.gz). Download this file to a directory of your choosing.
|
||||
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
||||
|
@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
|
|||
Not too lost so far right? That's great! If you cd into the directory:
|
||||
|
||||
```
|
||||
cd druid-services-0.6.121
|
||||
cd druid-services-0.6.138
|
||||
```
|
||||
|
||||
You should see a bunch of files:
|
||||
|
@ -69,6 +69,20 @@ You should see a bunch of files:
|
|||
* run_example_client.sh
|
||||
* LICENSE, config, examples, lib directories
|
||||
|
||||
Setting up Zookeeper
|
||||
--------------------
|
||||
|
||||
Before we get started, we need to start Apache Zookeeper.
|
||||
|
||||
```bash
|
||||
curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
|
||||
tar xzf zookeeper-3.4.5.tar.gz
|
||||
cd zookeeper-3.4.5
|
||||
cp conf/zoo_sample.cfg conf/zoo.cfg
|
||||
./bin/zkServer.sh start
|
||||
cd ..
|
||||
```
|
||||
|
||||
Running Example Scripts
|
||||
-----------------------
|
||||
|
||||
|
@ -275,7 +289,7 @@ and put the following in there:
|
|||
"dataSource": "wikipedia",
|
||||
"granularity": "all",
|
||||
"dimensions": [ "page" ],
|
||||
"orderBy": {
|
||||
"limitSpec": {
|
||||
"type": "default",
|
||||
"columns": [ { "dimension": "edit_count", "direction": "DESCENDING" } ],
|
||||
"limit": 10
|
||||
|
@ -288,7 +302,7 @@ and put the following in there:
|
|||
}
|
||||
```
|
||||
|
||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [LimitSpec](LimitSpec.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
|
||||
If you issue the query:
|
||||
|
||||
|
|
|
@ -85,28 +85,23 @@ config/overlord/runtime.properties
|
|||
The configurations for the overlord node are as follows:
|
||||
|
||||
```bash
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
druid.host=localhost
|
||||
druid.port=8087
|
||||
druid.service=overlord
|
||||
|
||||
-Ddruid.host=localhost
|
||||
-Ddruid.port=8080
|
||||
-Ddruid.service=overlord
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
-Ddruid.zk.service.host=localhost
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
-Ddruid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
|
||||
druid.selectors.indexing.serviceName=overlord
|
||||
druid.indexer.queue.startDelay=PT0M
|
||||
druid.indexer.runner.javaOpts="-server -Xmx256m"
|
||||
druid.indexer.fork.property.druid.processing.numThreads=1
|
||||
druid.indexer.fork.property.druid.computation.buffer.size=100000000
|
||||
```
|
||||
|
||||
If you are interested in reading more about these configurations, see [here](Indexing-Service.html).
|
||||
|
|
|
@ -109,22 +109,27 @@ You should be comfortable starting Druid nodes at this point. If not, it may be
|
|||
{
|
||||
"schema": {
|
||||
"dataSource": "wikipedia",
|
||||
"aggregators" : [{
|
||||
"type" : "count",
|
||||
"name" : "count"
|
||||
}, {
|
||||
"type" : "doubleSum",
|
||||
"name" : "added",
|
||||
"fieldName" : "added"
|
||||
}, {
|
||||
"type" : "doubleSum",
|
||||
"name" : "deleted",
|
||||
"fieldName" : "deleted"
|
||||
}, {
|
||||
"type" : "doubleSum",
|
||||
"name" : "delta",
|
||||
"fieldName" : "delta"
|
||||
}],
|
||||
"aggregators" : [
|
||||
{
|
||||
"type" : "count",
|
||||
"name" : "count"
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "added",
|
||||
"fieldName" : "added"
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "deleted",
|
||||
"fieldName" : "deleted"
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "delta",
|
||||
"fieldName" : "delta"
|
||||
}
|
||||
],
|
||||
"indexGranularity": "none"
|
||||
},
|
||||
"config": {
|
||||
|
@ -196,13 +201,15 @@ Note: This config uses a "test" [rejection policy](Plumber.html) which will acce
|
|||
Issuing a [TimeBoundaryQuery](TimeBoundaryQuery.html) to the real-time node should yield valid results:
|
||||
|
||||
```json
|
||||
[ {
|
||||
"timestamp" : "2013-08-31T01:02:33.000Z",
|
||||
"result" : {
|
||||
"minTime" : "2013-08-31T01:02:33.000Z",
|
||||
"maxTime" : "2013-08-31T12:41:27.000Z"
|
||||
[
|
||||
{
|
||||
"timestamp" : "2013-08-31T01:02:33.000Z",
|
||||
"result" : {
|
||||
"minTime" : "2013-08-31T01:02:33.000Z",
|
||||
"maxTime" : "2013-08-31T12:41:27.000Z"
|
||||
}
|
||||
}
|
||||
} ]
|
||||
]
|
||||
```
|
||||
|
||||
Batch Ingestion
|
||||
|
@ -287,22 +294,27 @@ Examining the contents of the file, you should find:
|
|||
},
|
||||
"targetPartitionSize" : 5000000,
|
||||
"rollupSpec" : {
|
||||
"aggs": [{
|
||||
"aggs": [
|
||||
{
|
||||
"type" : "count",
|
||||
"name" : "count"
|
||||
}, {
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "added",
|
||||
"fieldName" : "added"
|
||||
}, {
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "deleted",
|
||||
"fieldName" : "deleted"
|
||||
}, {
|
||||
},
|
||||
{
|
||||
"type" : "doubleSum",
|
||||
"name" : "delta",
|
||||
"fieldName" : "delta"
|
||||
}],
|
||||
}
|
||||
],
|
||||
"rollupGranularity" : "none"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes and external depende
|
|||
|
||||
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
|
||||
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.121-bin.tar.gz)
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.138-bin.tar.gz)
|
||||
|
||||
and untar the contents within by issuing:
|
||||
|
||||
|
@ -48,7 +48,7 @@ CREATE database druid;
|
|||
#### Setting up Zookeeper
|
||||
|
||||
```bash
|
||||
curl http://www.motorlogy.com/apache/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
|
||||
curl http://apache.osuosl.org/zookeeper/zookeeper-3.4.5/zookeeper-3.4.5.tar.gz -o zookeeper-3.4.5.tar.gz
|
||||
tar xzf zookeeper-3.4.5.tar.gz
|
||||
cd zookeeper-3.4.5
|
||||
cp conf/zoo_sample.cfg conf/zoo.cfg
|
||||
|
@ -120,7 +120,7 @@ druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
|
|||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
|
||||
druid.coordinator.startDelay=PT60s
|
||||
druid.coordinator.startDelay=PT70s
|
||||
```
|
||||
|
||||
To start the coordinator node:
|
||||
|
@ -149,7 +149,7 @@ druid.port=8081
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138"]
|
||||
|
||||
# Dummy read only AWS account (used to download example data)
|
||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||
|
@ -240,7 +240,7 @@ druid.port=8083
|
|||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.121","io.druid.extensions:druid-kafka-seven:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.138","io.druid.extensions:druid-kafka-seven:0.6.138"]
|
||||
|
||||
# Change this config to db to hand off to the rest of the Druid cluster
|
||||
druid.publish.type=noop
|
||||
|
|
|
@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
|
|||
|
||||
h3. Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.121-bin.tar.gz)
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.138-bin.tar.gz)
|
||||
Download this file to a directory of your choosing.
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
||||
|
@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz
|
|||
Not too lost so far right? That's great! If you cd into the directory:
|
||||
|
||||
```
|
||||
cd druid-services-0.6.121
|
||||
cd druid-services-0.6.138
|
||||
```
|
||||
|
||||
You should see a bunch of files:
|
||||
|
@ -251,7 +251,7 @@ and put the following in there:
|
|||
"dataSource": "webstream",
|
||||
"granularity": "all",
|
||||
"dimensions": [ "geo_region" ],
|
||||
"orderBy": {
|
||||
"limitSpec": {
|
||||
"type": "default",
|
||||
"columns": [
|
||||
{ "dimension": "known_users", "direction": "DESCENDING" }
|
||||
|
@ -267,7 +267,7 @@ and put the following in there:
|
|||
}
|
||||
```
|
||||
|
||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [OrderBy](OrderBy.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [LimitSpec](LimitSpec.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
|
||||
If you issue the query:
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source.
|
|||
|
||||
# Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.121-bin.tar.gz).
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.138-bin.tar.gz).
|
||||
Download this bad boy to a directory of your choosing.
|
||||
|
||||
You can extract the awesomeness within by issuing:
|
||||
|
@ -306,7 +306,7 @@ and put the following in there:
|
|||
"dataSource": "twitterstream",
|
||||
"granularity": "all",
|
||||
"dimensions": ["htags"],
|
||||
"orderBy": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
|
||||
"limitSpec": {"type":"default", "columns":[{"dimension": "tweets", "direction":"DESCENDING"}], "limit":5},
|
||||
"aggregations":[
|
||||
{ "type": "longSum", "fieldName": "tweets", "name": "tweets"}
|
||||
],
|
||||
|
@ -315,7 +315,7 @@ and put the following in there:
|
|||
}
|
||||
```
|
||||
|
||||
Woah! Our query just got a way more complicated. Now we have these "Filters":Filters.html things and this "OrderBy":OrderBy.html thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
Woah! Our query just got a way more complicated. Now we have these [Filters](Filters.html) things and this [LimitSpec](LimitSpec.html) thing. Fear not, it turns out the new objects we've introduced to our query can help define the format of our results and provide an answer to our question.
|
||||
|
||||
If you issue the query:
|
||||
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
.doc-content pre, .doc-content pre code {
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
word-wrap: normal;
|
||||
}
|
||||
|
||||
.doc-content p {
|
||||
margin: 18px 0 18px 0;
|
||||
}
|
||||
|
||||
/*** HACK: This is a horrible hack, but I have no clue why images don't want to stay in the container **/
|
||||
.doc-content img {
|
||||
max-width: 847.5px;
|
||||
}
|
||||
|
||||
.doc-content code {
|
||||
background-color: #e0e0e0;
|
||||
}
|
||||
|
||||
.doc-content pre code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.doc-content table,
|
||||
.doc-content pre {
|
||||
margin: 35px 0 35px 0;
|
||||
}
|
||||
|
||||
.doc-content table,
|
||||
.doc-content table > thead > tr > th,
|
||||
.doc-content table > tbody > tr > th,
|
||||
.doc-content table > tfoot > tr > th,
|
||||
.doc-content table > thead > tr > td,
|
||||
.doc-content table > tbody > tr > td,
|
||||
.doc-content table > tfoot > tr > td {
|
||||
border: 1px solid #dddddd;
|
||||
}
|
||||
|
||||
.doc-content table > thead > tr > th,
|
||||
.doc-content table > thead > tr > td {
|
||||
border-bottom-width: 2px;
|
||||
}
|
||||
|
||||
.doc-content table > tbody > tr:nth-child(odd) > td,
|
||||
.doc-content table > tbody > tr:nth-child(odd) > th {
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
.doc-content table > tbody > tr:hover > td,
|
||||
.doc-content table > tbody > tr:hover > th {
|
||||
background-color: #d5d5d5;
|
||||
}
|
||||
|
||||
.doc-content table code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
td, th {
|
||||
padding: 5px;
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
.toc ul {
|
||||
list-style: none;
|
||||
list-style-position: inside;
|
||||
padding-left: 15px;
|
||||
}
|
||||
|
|
@ -17,6 +17,7 @@ h2. Getting Started
|
|||
h2. Booting a Druid Cluster
|
||||
* "Simple Cluster Configuration":Simple-Cluster-Configuration.html
|
||||
* "Production Cluster Configuration":Production-Cluster-Configuration.html
|
||||
* "Rolling Cluster Updates":Rolling-Updates.html
|
||||
|
||||
h2. Configuration
|
||||
* "Common Configuration":Configuration.html
|
||||
|
@ -25,19 +26,22 @@ h2. Configuration
|
|||
* "Historical":Historical-Config.html
|
||||
* "Broker":Broker-Config.html
|
||||
* "Realtime":Realtime-Config.html
|
||||
* "Configuring Logging":./Logging.html
|
||||
|
||||
h2. Data Ingestion
|
||||
* "Ingestion FAQ":./Ingestion-FAQ.html
|
||||
* "Realtime":./Realtime-ingestion.html
|
||||
** "Kafka-0.8.x Ingestion":./Kafka-Eight.html
|
||||
* "Batch":./Batch-ingestion.html
|
||||
** "Different Hadoop Versions":./Other-Hadoop.html
|
||||
* "Indexing Service":./Indexing-Service.html
|
||||
** "Tasks":./Tasks.html
|
||||
* "Data Formats":./Data_formats.html
|
||||
* "Ingestion FAQ":./Ingestion-FAQ.html
|
||||
|
||||
h2. Operations
|
||||
* "Performance FAQ":./Performance-FAQ.html
|
||||
* "Extending Druid":./Modules.html
|
||||
* "Booting a Production Cluster":./Booting-a-production-cluster.html
|
||||
* "Performance FAQ":./Performance-FAQ.html
|
||||
|
||||
h2. Querying
|
||||
* "Querying":./Querying.html
|
||||
|
@ -48,7 +52,7 @@ h2. Querying
|
|||
** "DimensionSpecs":./DimensionSpecs.html
|
||||
* Query Types
|
||||
** "GroupBy":./GroupByQuery.html
|
||||
*** "OrderBy":./OrderBy.html
|
||||
*** "LimitSpec":./LimitSpec.html
|
||||
*** "Having":./Having.html
|
||||
** "Search":./SearchQuery.html
|
||||
*** "SearchQuerySpec":./SearchQuerySpec.html
|
||||
|
@ -72,13 +76,14 @@ h2. Architecture
|
|||
*** "Peon":./Peons.html
|
||||
* External Dependencies
|
||||
** "Deep Storage":./Deep-Storage.html
|
||||
** "MySQL":./MySQL.html
|
||||
** "Metadata Storage":./MySQL.html
|
||||
** "ZooKeeper":./ZooKeeper.html
|
||||
|
||||
h2. Experimental
|
||||
* "About Experimental Features":./About-Experimental-Features.html
|
||||
* "Geographic Queries":./GeographicQueries.html
|
||||
* "Select Query":./SelectQuery.html
|
||||
* "Approximate Histograms and Quantiles":./ApproxHisto.html
|
||||
|
||||
h2. Development
|
||||
* "Versioning":./Versioning.html
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
.blog-listing {
|
||||
margin-bottom: 70px;
|
||||
}
|
||||
|
||||
.blog-entry {
|
||||
margin-bottom: 70px;
|
||||
}
|
||||
|
||||
.recents ul li {
|
||||
font-weight: 400;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
.sub-text {
|
||||
margin-top: 20px;
|
||||
margin-bottom: 50px;
|
||||
}
|
||||
|
||||
.main-marketing {
|
||||
margin-bottom: 50px;
|
||||
}
|
||||
|
||||
.main-marketing a {
|
||||
color: #000000;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-weight: 400;
|
||||
font-size: 30px;
|
||||
}
|
||||
|
||||
.main-marketing img {
|
||||
margin-bottom: 40px;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
# Extensions
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.121","io.druid.extensions:druid-kafka-seven:0.6.121","io.druid.extensions:druid-rabbitmq:0.6.121", "io.druid.extensions:druid-s3-extensions:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.138","io.druid.extensions:druid-kafka-seven:0.6.138","io.druid.extensions:druid-rabbitmq:0.6.138", "io.druid.extensions:druid-s3-extensions:0.6.138"]
|
||||
|
||||
# Zookeeper
|
||||
druid.zk.service.host=localhost
|
||||
|
@ -19,5 +19,5 @@ druid.selectors.indexing.serviceName=overlord
|
|||
# Monitoring (disabled for examples)
|
||||
# druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Metrics logging
|
||||
# Metrics logging (disabled for examples)
|
||||
druid.emitter=noop
|
|
@ -2,7 +2,7 @@ druid.host=localhost
|
|||
druid.service=historical
|
||||
druid.port=8081
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.121"]
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.138"]
|
||||
|
||||
# Dummy read only AWS account (used to download example data)
|
||||
druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.storage.hdfs;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import io.druid.segment.loading.DataSegmentKiller;
|
||||
import io.druid.segment.loading.SegmentLoadingException;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class HdfsDataSegmentKiller implements DataSegmentKiller
|
||||
{
|
||||
private final Configuration config;
|
||||
|
||||
@Inject
|
||||
public HdfsDataSegmentKiller(final Configuration config)
|
||||
{
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void kill(DataSegment segment) throws SegmentLoadingException
|
||||
{
|
||||
final Path path = getPath(segment);
|
||||
final FileSystem fs = checkPathAndGetFilesystem(path);
|
||||
try {
|
||||
if (path.getName().endsWith(".zip")) {
|
||||
// delete the parent directory containing the zip file and the descriptor
|
||||
fs.delete(path.getParent(), true);
|
||||
} else {
|
||||
throw new SegmentLoadingException("Unknown file type[%s]", path);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new SegmentLoadingException(e, "Unable to kill segment");
|
||||
}
|
||||
}
|
||||
|
||||
private Path getPath(DataSegment segment)
|
||||
{
|
||||
return new Path(String.valueOf(segment.getLoadSpec().get("path")));
|
||||
}
|
||||
|
||||
private FileSystem checkPathAndGetFilesystem(Path path) throws SegmentLoadingException
|
||||
{
|
||||
FileSystem fs;
|
||||
try {
|
||||
fs = path.getFileSystem(config);
|
||||
|
||||
if (!fs.exists(path)) {
|
||||
throw new SegmentLoadingException("Path[%s] doesn't exist.", path);
|
||||
}
|
||||
|
||||
return fs;
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", path);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ package io.druid.storage.hdfs;
|
|||
|
||||
import com.google.common.io.Closeables;
|
||||
import com.google.inject.Inject;
|
||||
import com.metamx.common.guava.CloseQuietly;
|
||||
import io.druid.segment.loading.DataSegmentPuller;
|
||||
import io.druid.segment.loading.SegmentLoadingException;
|
||||
import io.druid.timeline.DataSegment;
|
||||
|
@ -52,22 +53,17 @@ public class HdfsDataSegmentPuller implements DataSegmentPuller
|
|||
|
||||
final FileSystem fs = checkPathAndGetFilesystem(path);
|
||||
|
||||
FSDataInputStream in = null;
|
||||
try {
|
||||
if (path.getName().endsWith(".zip")) {
|
||||
in = fs.open(path);
|
||||
CompressionUtils.unzip(in, dir);
|
||||
in.close();
|
||||
if (path.getName().endsWith(".zip")) {
|
||||
try {
|
||||
try (FSDataInputStream in = fs.open(path)) {
|
||||
CompressionUtils.unzip(in, dir);
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw new SegmentLoadingException("Unknown file type[%s]", path);
|
||||
catch (IOException e) {
|
||||
throw new SegmentLoadingException(e, "Some IOException");
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new SegmentLoadingException(e, "Some IOException");
|
||||
}
|
||||
finally {
|
||||
Closeables.closeQuietly(in);
|
||||
} else {
|
||||
throw new SegmentLoadingException("Unknown file type[%s]", path);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,7 +81,8 @@ public class HdfsDataSegmentPuller implements DataSegmentPuller
|
|||
}
|
||||
}
|
||||
|
||||
private Path getPath(DataSegment segment) {
|
||||
private Path getPath(DataSegment segment)
|
||||
{
|
||||
return new Path(String.valueOf(segment.getLoadSpec().get("path")));
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import com.google.common.io.ByteStreams;
|
|||
import com.google.common.io.Closeables;
|
||||
import com.google.common.io.OutputSupplier;
|
||||
import com.google.inject.Inject;
|
||||
import com.metamx.common.guava.CloseQuietly;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.segment.SegmentUtils;
|
||||
import io.druid.segment.loading.DataSegmentPusher;
|
||||
|
@ -78,17 +79,10 @@ public class HdfsDataSegmentPusher implements DataSegmentPusher
|
|||
|
||||
fs.mkdirs(outFile.getParent());
|
||||
log.info("Compressing files from[%s] to [%s]", inDir, outFile);
|
||||
FSDataOutputStream out = null;
|
||||
|
||||
long size;
|
||||
try {
|
||||
out = fs.create(outFile);
|
||||
|
||||
try (FSDataOutputStream out = fs.create(outFile)) {
|
||||
size = CompressionUtils.zip(inDir, out);
|
||||
|
||||
out.close();
|
||||
}
|
||||
finally {
|
||||
Closeables.closeQuietly(out);
|
||||
}
|
||||
|
||||
return createDescriptorFile(
|
||||
|
|
|
@ -27,6 +27,8 @@ import io.druid.guice.Binders;
|
|||
import io.druid.guice.JsonConfigProvider;
|
||||
import io.druid.guice.LazySingleton;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.storage.hdfs.tasklog.HdfsTaskLogs;
|
||||
import io.druid.storage.hdfs.tasklog.HdfsTaskLogsConfig;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -55,6 +57,7 @@ public class HdfsStorageDruidModule implements DruidModule
|
|||
{
|
||||
Binders.dataSegmentPullerBinder(binder).addBinding("hdfs").to(HdfsDataSegmentPuller.class).in(LazySingleton.class);
|
||||
Binders.dataSegmentPusherBinder(binder).addBinding("hdfs").to(HdfsDataSegmentPusher.class).in(LazySingleton.class);
|
||||
Binders.dataSegmentKillerBinder(binder).addBinding("hdfs").to(HdfsDataSegmentKiller.class).in(LazySingleton.class);
|
||||
|
||||
final Configuration conf = new Configuration();
|
||||
if (props != null) {
|
||||
|
@ -67,5 +70,9 @@ public class HdfsStorageDruidModule implements DruidModule
|
|||
|
||||
binder.bind(Configuration.class).toInstance(conf);
|
||||
JsonConfigProvider.bind(binder, "druid.storage", HdfsDataSegmentPusherConfig.class);
|
||||
|
||||
Binders.taskLogsBinder(binder).addBinding("hdfs").to(HdfsTaskLogs.class);
|
||||
JsonConfigProvider.bind(binder, "druid.indexer.logs", HdfsTaskLogsConfig.class);
|
||||
binder.bind(HdfsTaskLogs.class).in(LazySingleton.class);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
package io.druid.storage.hdfs.tasklog;
|
||||
|
||||
import com.google.common.base.Optional;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.InputSupplier;
|
||||
import com.google.inject.Inject;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.tasklogs.TaskLogs;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Indexer hdfs task logs, to support storing hdfs tasks to hdfs
|
||||
*
|
||||
* Created by Frank Ren on 6/20/14.
|
||||
*/
|
||||
public class HdfsTaskLogs implements TaskLogs
|
||||
{
|
||||
private static final Logger log = new Logger(HdfsTaskLogs.class);
|
||||
|
||||
private final HdfsTaskLogsConfig config;
|
||||
|
||||
@Inject
|
||||
public HdfsTaskLogs(HdfsTaskLogsConfig config)
|
||||
{
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pushTaskLog(String taskId, File logFile) throws IOException
|
||||
{
|
||||
final Path path = getTaskLogFileFromId(taskId);
|
||||
log.info("writing task log to: %s", path);
|
||||
Configuration conf = new Configuration();
|
||||
final FileSystem fs = FileSystem.get(conf);
|
||||
FileUtil.copy(logFile, fs, path, false, conf);
|
||||
log.info("wrote task log to: %s", path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<InputSupplier<InputStream>> streamTaskLog(final String taskId, final long offset) throws IOException
|
||||
{
|
||||
final Path path = getTaskLogFileFromId(taskId);
|
||||
final FileSystem fs = FileSystem.get(new Configuration());
|
||||
if (fs.exists(path)) {
|
||||
return Optional.<InputSupplier<InputStream>>of(
|
||||
new InputSupplier<InputStream>() {
|
||||
@Override
|
||||
public InputStream getInput() throws IOException
|
||||
{
|
||||
log.info("reading task log from: %s", path);
|
||||
final InputStream inputStream = fs.open(path);
|
||||
ByteStreams.skipFully(inputStream, offset);
|
||||
log.info("read task log from: %s", path);
|
||||
return inputStream;
|
||||
}
|
||||
}
|
||||
);
|
||||
} else {
|
||||
return Optional.absent();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in
|
||||
* path names. So we format paths differently for HDFS.
|
||||
*/
|
||||
private Path getTaskLogFileFromId(String taskId)
|
||||
{
|
||||
return new Path(mergePaths(config.getDirectory(), taskId.replaceAll(":", "_")));
|
||||
}
|
||||
|
||||
// some hadoop version Path.mergePaths does not exist
|
||||
private static String mergePaths(String path1, String path2)
|
||||
{
|
||||
return path1 + (path1.endsWith(Path.SEPARATOR) ? "" : Path.SEPARATOR) + path2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
package io.druid.storage.hdfs.tasklog;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
/**
|
||||
* Indexer hdfs task logs configuration
|
||||
*
|
||||
* Created by Frank Ren on 6/20/14.
|
||||
*/
|
||||
public class HdfsTaskLogsConfig
|
||||
{
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
private String directory;
|
||||
|
||||
public String getDirectory()
|
||||
{
|
||||
return directory;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Druid - a distributed column store.
|
||||
~ Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
~
|
||||
~ This program is free software; you can redistribute it and/or
|
||||
~ modify it under the terms of the GNU General Public License
|
||||
~ as published by the Free Software Foundation; either version 2
|
||||
~ of the License, or (at your option) any later version.
|
||||
~
|
||||
~ This program is distributed in the hope that it will be useful,
|
||||
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
~ GNU General Public License for more details.
|
||||
~
|
||||
~ You should have received a copy of the GNU General Public License
|
||||
~ along with this program; if not, write to the Free Software
|
||||
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>io.druid.extensions</groupId>
|
||||
<artifactId>druid-histogram</artifactId>
|
||||
<name>druid-histogram</name>
|
||||
<description>druid-histogram</description>
|
||||
|
||||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid-processing</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Tests -->
|
||||
<dependency>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid-processing</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
|
||||
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.primitives.Longs;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.segment.FloatColumnSelector;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class ApproximateHistogramAggregator implements Aggregator
|
||||
{
|
||||
public static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Longs.compare(((ApproximateHistogram) o).count(), ((ApproximateHistogram) o1).count());
|
||||
}
|
||||
};
|
||||
|
||||
static Object combineHistograms(Object lhs, Object rhs)
|
||||
{
|
||||
return ((ApproximateHistogram) lhs).foldFast((ApproximateHistogram) rhs);
|
||||
}
|
||||
|
||||
private final String name;
|
||||
private final FloatColumnSelector selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
private ApproximateHistogram histogram;
|
||||
|
||||
public ApproximateHistogramAggregator(
|
||||
String name,
|
||||
FloatColumnSelector selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
histogram.offer(selector.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get()
|
||||
{
|
||||
return histogram;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.primitives.Floats;
|
||||
import com.google.common.primitives.Ints;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@JsonTypeName("approxHistogram")
|
||||
public class ApproximateHistogramAggregatorFactory implements AggregatorFactory
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x8;
|
||||
|
||||
protected final String name;
|
||||
protected final String fieldName;
|
||||
|
||||
protected final int resolution;
|
||||
protected final int numBuckets;
|
||||
|
||||
protected final float lowerLimit;
|
||||
protected final float upperLimit;
|
||||
|
||||
@JsonCreator
|
||||
public ApproximateHistogramAggregatorFactory(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("resolution") Integer resolution,
|
||||
@JsonProperty("numBuckets") Integer numBuckets,
|
||||
@JsonProperty("lowerLimit") Float lowerLimit,
|
||||
@JsonProperty("upperLimit") Float upperLimit
|
||||
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.fieldName = fieldName.toLowerCase();
|
||||
this.resolution = resolution == null ? ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE : resolution;
|
||||
this.numBuckets = numBuckets == null ? ApproximateHistogram.DEFAULT_BUCKET_SIZE : numBuckets;
|
||||
this.lowerLimit = lowerLimit == null ? Float.NEGATIVE_INFINITY : lowerLimit;
|
||||
this.upperLimit = upperLimit == null ? Float.POSITIVE_INFINITY : upperLimit;
|
||||
|
||||
Preconditions.checkArgument(this.resolution > 0, "resolution must be greater than 1");
|
||||
Preconditions.checkArgument(this.numBuckets > 0, "numBuckets must be greater than 1");
|
||||
Preconditions.checkArgument(this.upperLimit > this.lowerLimit, "upperLimit must be greater than lowerLimit");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
return new ApproximateHistogramAggregator(
|
||||
name,
|
||||
metricFactory.makeFloatColumnSelector(fieldName),
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
return new ApproximateHistogramBufferAggregator(
|
||||
metricFactory.makeFloatColumnSelector(fieldName),
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return ApproximateHistogramAggregator.COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object combine(Object lhs, Object rhs)
|
||||
{
|
||||
return ApproximateHistogramAggregator.combineHistograms(lhs, rhs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getCombiningFactory()
|
||||
{
|
||||
return new ApproximateHistogramAggregatorFactory(name, name, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
return Arrays.<AggregatorFactory>asList(
|
||||
new ApproximateHistogramAggregatorFactory(
|
||||
fieldName,
|
||||
fieldName,
|
||||
resolution,
|
||||
numBuckets,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object deserialize(Object object)
|
||||
{
|
||||
if (object instanceof byte[]) {
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes((byte[]) object);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else if (object instanceof ByteBuffer) {
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes((ByteBuffer) object);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else if (object instanceof String) {
|
||||
byte[] bytes = Base64.decodeBase64(((String) object).getBytes(Charsets.UTF_8));
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes(bytes);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else {
|
||||
return object;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object finalizeComputation(Object object)
|
||||
{
|
||||
return ((ApproximateHistogram) object).toHistogram(numBuckets);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFieldName()
|
||||
{
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getResolution()
|
||||
{
|
||||
return resolution;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getLowerLimit()
|
||||
{
|
||||
return lowerLimit;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getUpperLimit()
|
||||
{
|
||||
return upperLimit;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getNumBuckets()
|
||||
{
|
||||
return numBuckets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> requiredFields()
|
||||
{
|
||||
return Arrays.asList(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] fieldNameBytes = fieldName.getBytes(Charsets.UTF_8);
|
||||
return ByteBuffer.allocate(1 + fieldNameBytes.length + Ints.BYTES * 2 + Floats.BYTES * 2)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.put(fieldNameBytes)
|
||||
.putInt(resolution)
|
||||
.putInt(numBuckets)
|
||||
.putFloat(lowerLimit)
|
||||
.putFloat(upperLimit).array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return "approximateHistogram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxIntermediateSize()
|
||||
{
|
||||
return new ApproximateHistogram(resolution).getMaxStorageSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getAggregatorStartValue()
|
||||
{
|
||||
return new ApproximateHistogram(resolution);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "ApproximateHistogramAggregatorFactory{" +
|
||||
"name='" + name + '\'' +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
", resolution=" + resolution +
|
||||
", numBuckets=" + numBuckets +
|
||||
", lowerLimit=" + lowerLimit +
|
||||
", upperLimit=" + upperLimit +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.FloatColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramBufferAggregator implements BufferAggregator
|
||||
{
|
||||
private final FloatColumnSelector selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
public ApproximateHistogramBufferAggregator(FloatColumnSelector selector, int resolution, float lowerLimit, float upperLimit)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
mutationBuffer.putInt(resolution);
|
||||
mutationBuffer.putInt(0); //initial binCount
|
||||
for (int i = 0; i < resolution; ++i) {
|
||||
mutationBuffer.putFloat(0f);
|
||||
}
|
||||
for (int i = 0; i < resolution; ++i) {
|
||||
mutationBuffer.putLong(0L);
|
||||
}
|
||||
|
||||
// min
|
||||
mutationBuffer.putFloat(Float.POSITIVE_INFINITY);
|
||||
// max
|
||||
mutationBuffer.putFloat(Float.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
h0.offer(selector.get());
|
||||
|
||||
mutationBuffer.position(position);
|
||||
h0.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
return ApproximateHistogram.fromBytes(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(ByteBuffer buf, int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramBufferAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.databind.Module;
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.inject.Binder;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.segment.serde.ComplexMetrics;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ApproximateHistogramDruidModule implements DruidModule
|
||||
{
|
||||
@Override
|
||||
public List<? extends Module> getJacksonModules()
|
||||
{
|
||||
return ImmutableList.of(
|
||||
new SimpleModule().registerSubtypes(
|
||||
ApproximateHistogramFoldingAggregatorFactory.class,
|
||||
ApproximateHistogramAggregatorFactory.class,
|
||||
EqualBucketsPostAggregator.class,
|
||||
CustomBucketsPostAggregator.class,
|
||||
BucketsPostAggregator.class,
|
||||
QuantilesPostAggregator.class,
|
||||
QuantilePostAggregator.class,
|
||||
MinPostAggregator.class,
|
||||
MaxPostAggregator.class
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Binder binder)
|
||||
{
|
||||
if (ComplexMetrics.getSerdeForType("approximateHistogram") == null) {
|
||||
ComplexMetrics.registerSerde("approximateHistogram", new ApproximateHistogramFoldingSerde());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
public class ApproximateHistogramFoldingAggregator implements Aggregator
|
||||
{
|
||||
private final String name;
|
||||
private final ObjectColumnSelector<ApproximateHistogram> selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
private ApproximateHistogram histogram;
|
||||
private float[] tmpBufferP;
|
||||
private long[] tmpBufferB;
|
||||
|
||||
public ApproximateHistogramFoldingAggregator(
|
||||
String name,
|
||||
ObjectColumnSelector<ApproximateHistogram> selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
|
||||
tmpBufferP = new float[resolution];
|
||||
tmpBufferB = new long[resolution];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
ApproximateHistogram h = selector.get();
|
||||
if (h == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (h.binCount() + histogram.binCount() <= tmpBufferB.length) {
|
||||
histogram.foldFast(h, tmpBufferP, tmpBufferB);
|
||||
} else {
|
||||
histogram.foldFast(h);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get()
|
||||
{
|
||||
return histogram;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.primitives.Floats;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.metamx.common.IAE;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
@JsonTypeName("approxHistogramFold")
|
||||
public class ApproximateHistogramFoldingAggregatorFactory extends ApproximateHistogramAggregatorFactory
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x9;
|
||||
|
||||
@JsonCreator
|
||||
public ApproximateHistogramFoldingAggregatorFactory(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("resolution") Integer resolution,
|
||||
@JsonProperty("numBuckets") Integer numBuckets,
|
||||
@JsonProperty("lowerLimit") Float lowerLimit,
|
||||
@JsonProperty("upperLimit") Float upperLimit
|
||||
)
|
||||
{
|
||||
super(name, fieldName, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
ObjectColumnSelector selector = metricFactory.makeObjectColumnSelector(fieldName);
|
||||
|
||||
if (selector == null) {
|
||||
// gracefully handle undefined metrics
|
||||
|
||||
selector = new ObjectColumnSelector<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> classOfObject()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram get()
|
||||
{
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (ApproximateHistogram.class.isAssignableFrom(selector.classOfObject())) {
|
||||
return new ApproximateHistogramFoldingAggregator(
|
||||
name,
|
||||
selector,
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
throw new IAE(
|
||||
"Incompatible type for metric[%s], expected a ApproximateHistogram, got a %s",
|
||||
fieldName,
|
||||
selector.classOfObject()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
ObjectColumnSelector selector = metricFactory.makeObjectColumnSelector(fieldName);
|
||||
|
||||
if (selector == null) {
|
||||
// gracefully handle undefined metrics
|
||||
|
||||
selector = new ObjectColumnSelector<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> classOfObject()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram get()
|
||||
{
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (ApproximateHistogram.class.isAssignableFrom(selector.classOfObject())) {
|
||||
return new ApproximateHistogramFoldingBufferAggregator(selector, resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
throw new IAE(
|
||||
"Incompatible type for metric[%s], expected a ApproximateHistogram, got a %s",
|
||||
fieldName,
|
||||
selector.classOfObject()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getCombiningFactory()
|
||||
{
|
||||
return new ApproximateHistogramFoldingAggregatorFactory(name, name, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] fieldNameBytes = fieldName.getBytes(Charsets.UTF_8);
|
||||
return ByteBuffer.allocate(1 + fieldNameBytes.length + Ints.BYTES * 2 + Floats.BYTES * 2)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.put(fieldNameBytes)
|
||||
.putInt(resolution)
|
||||
.putInt(numBuckets)
|
||||
.putFloat(lowerLimit)
|
||||
.putFloat(upperLimit)
|
||||
.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "ApproximateHistogramFoldingAggregatorFactory{" +
|
||||
"name='" + name + '\'' +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
", resolution=" + resolution +
|
||||
", numBuckets=" + numBuckets +
|
||||
", lowerLimit=" + lowerLimit +
|
||||
", upperLimit=" + upperLimit +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramFoldingBufferAggregator implements BufferAggregator
|
||||
{
|
||||
private final ObjectColumnSelector<ApproximateHistogram> selector;
|
||||
private final int resolution;
|
||||
private final float upperLimit;
|
||||
private final float lowerLimit;
|
||||
|
||||
private float[] tmpBufferP;
|
||||
private long[] tmpBufferB;
|
||||
|
||||
public ApproximateHistogramFoldingBufferAggregator(
|
||||
ObjectColumnSelector<ApproximateHistogram> selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
|
||||
tmpBufferP = new float[resolution];
|
||||
tmpBufferB = new long[resolution];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(ByteBuffer buf, int position)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
// use dense storage for aggregation
|
||||
h.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
h0.setLowerLimit(lowerLimit);
|
||||
h0.setUpperLimit(upperLimit);
|
||||
ApproximateHistogram hNext = selector.get();
|
||||
h0.foldFast(hNext, tmpBufferP, tmpBufferB);
|
||||
|
||||
mutationBuffer.position(position);
|
||||
h0.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.asReadOnlyBuffer();
|
||||
mutationBuffer.position(position);
|
||||
return ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(ByteBuffer buf, int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramFoldingBufferAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.collect.Ordering;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.segment.column.ColumnBuilder;
|
||||
import io.druid.segment.data.GenericIndexed;
|
||||
import io.druid.segment.data.ObjectStrategy;
|
||||
import io.druid.segment.serde.ColumnPartSerde;
|
||||
import io.druid.segment.serde.ComplexColumnPartSerde;
|
||||
import io.druid.segment.serde.ComplexColumnPartSupplier;
|
||||
import io.druid.segment.serde.ComplexMetricExtractor;
|
||||
import io.druid.segment.serde.ComplexMetricSerde;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class ApproximateHistogramFoldingSerde extends ComplexMetricSerde
|
||||
{
|
||||
private static Ordering<ApproximateHistogram> comparator = new Ordering<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public int compare(
|
||||
ApproximateHistogram arg1, ApproximateHistogram arg2
|
||||
)
|
||||
{
|
||||
return ApproximateHistogramAggregator.COMPARATOR.compare(arg1, arg2);
|
||||
}
|
||||
}.nullsFirst();
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return "approximateHistogram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return new ComplexMetricExtractor()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> extractedClass()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram extractValue(InputRow inputRow, String metricName)
|
||||
{
|
||||
Object rawValue = inputRow.getRaw(metricName);
|
||||
|
||||
if (rawValue instanceof ApproximateHistogram) {
|
||||
return (ApproximateHistogram) rawValue;
|
||||
} else {
|
||||
List<String> dimValues = inputRow.getDimension(metricName);
|
||||
if (dimValues != null && dimValues.size() > 0) {
|
||||
Iterator<String> values = dimValues.iterator();
|
||||
|
||||
ApproximateHistogram h = new ApproximateHistogram();
|
||||
|
||||
while (values.hasNext()) {
|
||||
float value = Float.parseFloat(values.next());
|
||||
h.offer(value);
|
||||
}
|
||||
return h;
|
||||
} else {
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnPartSerde deserializeColumn(
|
||||
ByteBuffer byteBuffer, ColumnBuilder columnBuilder
|
||||
)
|
||||
{
|
||||
final GenericIndexed column = GenericIndexed.read(byteBuffer, getObjectStrategy());
|
||||
|
||||
columnBuilder.setComplexColumn(new ComplexColumnPartSupplier(getTypeName(), column));
|
||||
|
||||
return new ComplexColumnPartSerde(column, getTypeName());
|
||||
}
|
||||
|
||||
public ObjectStrategy getObjectStrategy()
|
||||
{
|
||||
return new ObjectStrategy<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<? extends ApproximateHistogram> getClazz()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram fromByteBuffer(ByteBuffer buffer, int numBytes)
|
||||
{
|
||||
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
|
||||
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
|
||||
return ApproximateHistogram.fromBytes(readOnlyBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toBytes(ApproximateHistogram h)
|
||||
{
|
||||
if (h == null) {
|
||||
return new byte[]{};
|
||||
}
|
||||
return h.toBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ApproximateHistogram o1, ApproximateHistogram o2)
|
||||
{
|
||||
return comparator.compare(o1, o2);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class ApproximateHistogramPostAggregator implements PostAggregator
|
||||
{
|
||||
private static final Comparator COMPARATOR = ApproximateHistogramAggregator.COMPARATOR;
|
||||
|
||||
private final String name;
|
||||
private final String fieldName;
|
||||
|
||||
public ApproximateHistogramPostAggregator(
|
||||
String name,
|
||||
String fieldName
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract Object compute(Map<String, Object> values);
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFieldName()
|
||||
{
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract String toString();
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
public class ArrayUtils
|
||||
{
|
||||
public static int hashCode(long[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
long v = a[i];
|
||||
hashCode = 31 * hashCode + (int) (v ^ (v >>> 32));
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public static int hashCode(float[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
hashCode = 31 * hashCode + Float.floatToIntBits(a[i]);
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public static int hashCode(double[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
long v = Double.doubleToLongBits(a[i]);
|
||||
hashCode = 31 * hashCode + (int) (v ^ (v >>> 32));
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("buckets")
|
||||
public class BucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float bucketSize;
|
||||
private final float offset;
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public BucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("bucketSize") float bucketSize,
|
||||
@JsonProperty("offset") float offset
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.bucketSize = bucketSize;
|
||||
if (this.bucketSize <= 0) {
|
||||
throw new IAE("Illegal bucketSize [%s], must be > 0", this.bucketSize);
|
||||
}
|
||||
this.offset = offset;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(bucketSize, offset);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getBucketSize()
|
||||
{
|
||||
return bucketSize;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getOffset()
|
||||
{
|
||||
return bucketSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "BucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", bucketSize=" + this.getBucketSize() +
|
||||
", offset=" + this.getOffset() +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import java.nio.DoubleBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
|
||||
public class BufferUtils
|
||||
{
|
||||
public static int binarySearch(DoubleBuffer buf, int minIndex, int maxIndex, double value)
|
||||
{
|
||||
while (minIndex < maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex - 1) >>> 1;
|
||||
|
||||
double currValue = buf.get(currIndex);
|
||||
int comparison = Double.compare(currValue, value);
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
}
|
||||
|
||||
if (comparison < 0) {
|
||||
minIndex = currIndex + 1;
|
||||
} else {
|
||||
maxIndex = currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -(minIndex + 1);
|
||||
}
|
||||
|
||||
public static int binarySearch(FloatBuffer buf, int minIndex, int maxIndex, float value)
|
||||
{
|
||||
while (minIndex < maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex - 1) >>> 1;
|
||||
|
||||
float currValue = buf.get(currIndex);
|
||||
int comparison = Float.compare(currValue, value);
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
}
|
||||
|
||||
if (comparison < 0) {
|
||||
minIndex = currIndex + 1;
|
||||
} else {
|
||||
maxIndex = currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -(minIndex + 1);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("customBuckets")
|
||||
public class CustomBucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float[] breaks;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public CustomBucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("breaks") float[] breaks
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.breaks = breaks;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(breaks);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getBreaks()
|
||||
{
|
||||
return breaks;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "CustomBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", breaks=" + Arrays.toString(this.getBreaks()) +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("equalBuckets")
|
||||
public class EqualBucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final int numBuckets;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public EqualBucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("numBuckets") int numBuckets
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.numBuckets = numBuckets;
|
||||
if (this.numBuckets <= 1) {
|
||||
throw new IAE("Illegal number of buckets[%s], must be > 1", this.numBuckets);
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(numBuckets);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getNumBuckets()
|
||||
{
|
||||
return numBuckets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "EqualBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", numBuckets=" + this.getNumBuckets() +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class Histogram
|
||||
{
|
||||
double[] breaks;
|
||||
double[] counts;
|
||||
|
||||
public Histogram(float[] breaks, double[] counts)
|
||||
{
|
||||
double[] retVal = new double[breaks.length];
|
||||
for (int i = 0; i < breaks.length; ++i) {
|
||||
retVal[i] = (double) breaks[i];
|
||||
}
|
||||
|
||||
this.breaks = retVal;
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double[] getBreaks()
|
||||
{
|
||||
return breaks;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double[] getCounts()
|
||||
{
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Histogram that = (Histogram) o;
|
||||
|
||||
if (!Arrays.equals(this.getBreaks(), that.getBreaks())) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(this.getCounts(), that.getCounts())) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
int result = (this.getBreaks() != null ? ArrayUtils.hashCode(this.getBreaks(), 0, this.getBreaks().length) : 0);
|
||||
result = 31 * result + (this.getCounts() != null ? ArrayUtils.hashCode(
|
||||
this.getCounts(),
|
||||
0,
|
||||
this.getCounts().length
|
||||
) : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("max")
|
||||
public class MaxPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public MaxPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getMax();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("min")
|
||||
public class MinPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public MinPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getMin();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("quantile")
|
||||
public class QuantilePostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private final float probability;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public QuantilePostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("probability") float probability
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.probability = probability;
|
||||
this.fieldName = fieldName;
|
||||
|
||||
if (probability < 0 | probability > 1) {
|
||||
throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", probability);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getQuantiles(new float[]{this.getProbability()})[0];
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getProbability()
|
||||
{
|
||||
return probability;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"probability=" + probability +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
@JsonTypeName("quantiles")
|
||||
public class Quantiles
|
||||
{
|
||||
float[] probabilities;
|
||||
float[] quantiles;
|
||||
float min;
|
||||
float max;
|
||||
|
||||
@JsonCreator
|
||||
public Quantiles(
|
||||
@JsonProperty("probabilities") float[] probabilities,
|
||||
@JsonProperty("quantiles") float[] quantiles,
|
||||
@JsonProperty("min") float min,
|
||||
@JsonProperty("max") float max
|
||||
)
|
||||
{
|
||||
this.probabilities = probabilities;
|
||||
this.quantiles = quantiles;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getProbabilities()
|
||||
{
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getQuantiles()
|
||||
{
|
||||
return quantiles;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getMin()
|
||||
{
|
||||
return min;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getMax()
|
||||
{
|
||||
return max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Quantiles quantiles1 = (Quantiles) o;
|
||||
|
||||
if (Float.compare(quantiles1.max, max) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (Float.compare(quantiles1.min, min) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(probabilities, quantiles1.probabilities)) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(quantiles, quantiles1.quantiles)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
int result = probabilities != null ? Arrays.hashCode(probabilities) : 0;
|
||||
result = 31 * result + (quantiles != null ? Arrays.hashCode(quantiles) : 0);
|
||||
result = 31 * result + (min != +0.0f ? Float.floatToIntBits(min) : 0);
|
||||
result = 31 * result + (max != +0.0f ? Float.floatToIntBits(max) : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("quantiles")
|
||||
public class QuantilesPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float[] probabilities;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public QuantilesPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("probabilities") float[] probabilities
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.probabilities = probabilities;
|
||||
this.fieldName = fieldName;
|
||||
|
||||
for (float p : probabilities) {
|
||||
if (p < 0 | p > 1) {
|
||||
throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
|
||||
return new Quantiles(this.getProbabilities(), ah.getQuantiles(this.getProbabilities()), ah.getMin(), ah.getMax());
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getProbabilities()
|
||||
{
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "EqualBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", probabilities=" + Arrays.toString(this.getProbabilities()) +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
io.druid.query.aggregation.histogram.ApproximateHistogramDruidModule
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.TestFloatColumnSelector;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramAggregatorTest
|
||||
{
|
||||
private void aggregateBuffer(TestFloatColumnSelector selector, BufferAggregator agg, ByteBuffer buf, int position)
|
||||
{
|
||||
agg.aggregate(buf, position);
|
||||
selector.increment();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBufferAggregate() throws Exception
|
||||
{
|
||||
final float[] values = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45};
|
||||
final int resolution = 5;
|
||||
final int numBuckets = 5;
|
||||
|
||||
final TestFloatColumnSelector selector = new TestFloatColumnSelector(values);
|
||||
|
||||
ApproximateHistogramAggregatorFactory factory = new ApproximateHistogramAggregatorFactory(
|
||||
"billy", "billy", resolution, numBuckets, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY
|
||||
);
|
||||
ApproximateHistogramBufferAggregator agg = new ApproximateHistogramBufferAggregator(selector, resolution, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
|
||||
|
||||
ByteBuffer buf = ByteBuffer.allocate(factory.getMaxIntermediateSize());
|
||||
int position = 0;
|
||||
|
||||
agg.init(buf, position);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
aggregateBuffer(selector, agg, buf, position);
|
||||
}
|
||||
|
||||
ApproximateHistogram h = ((ApproximateHistogram) agg.get(buf, position));
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions don't match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions, 0.01f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts don't match expected counts",
|
||||
new long[]{1, 2, 3, 3, 1}, h.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("getMin value doesn't match expected getMin", 2, h.min(), 0);
|
||||
Assert.assertEquals("getMax value doesn't match expected getMax", 45, h.max(), 0);
|
||||
|
||||
Assert.assertEquals("bin count doesn't match expected bin count", 5, h.binCount());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.primitives.Floats;
|
||||
import io.druid.query.aggregation.Histogram;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class ApproximateHistogramErrorBenchmark
|
||||
{
|
||||
private boolean debug = true;
|
||||
private int numBuckets = 20;
|
||||
private int numBreaks = numBuckets + 1;
|
||||
private int numPerHist = 50;
|
||||
private int numHists = 10;
|
||||
private int resolution = 50;
|
||||
private int combinedResolution = 100;
|
||||
private Random rand = new Random(2);
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setDebug(boolean debug)
|
||||
{
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumBuckets(int numBuckets)
|
||||
{
|
||||
this.numBuckets = numBuckets;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumBreaks(int numBreaks)
|
||||
{
|
||||
this.numBreaks = numBreaks;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumPerHist(int numPerHist)
|
||||
{
|
||||
this.numPerHist = numPerHist;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumHists(int numHists)
|
||||
{
|
||||
this.numHists = numHists;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setResolution(int resolution)
|
||||
{
|
||||
this.resolution = resolution;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setCombinedResolution(int combinedResolution)
|
||||
{
|
||||
this.combinedResolution = combinedResolution;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
ApproximateHistogramErrorBenchmark approxHist = new ApproximateHistogramErrorBenchmark();
|
||||
System.out.println(
|
||||
Arrays.toString(
|
||||
approxHist.setDebug(true)
|
||||
.setNumPerHist(50)
|
||||
.setNumHists(10000)
|
||||
.setResolution(50)
|
||||
.setCombinedResolution(100)
|
||||
.getErrors()
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
ApproximateHistogramErrorBenchmark approxHist2 = new ApproximateHistogramErrorBenchmark();
|
||||
int[] numHistsArray = new int[]{10, 100, 1000, 10000, 100000};
|
||||
float[] errs1 = new float[numHistsArray.length];
|
||||
float[] errs2 = new float[numHistsArray.length];
|
||||
for (int i = 0; i < numHistsArray.length; ++i) {
|
||||
float[] tmp = approxHist2.setDebug(false).setNumHists(numHistsArray[i]).setCombinedResolution(100).getErrors();
|
||||
errs1[i] = tmp[0];
|
||||
errs2[i] = tmp[1];
|
||||
}
|
||||
|
||||
System.out
|
||||
.format("Number of histograms for folding : %s \n", Arrays.toString(numHistsArray));
|
||||
System.out.format("Errors for approximate histogram : %s \n", Arrays.toString(errs1));
|
||||
System.out.format("Errors for approximate histogram, ruleFold : %s \n", Arrays.toString(errs2));
|
||||
}
|
||||
|
||||
private float[] getErrors()
|
||||
{
|
||||
final int numValues = numHists * numPerHist;
|
||||
final float[] values = new float[numValues];
|
||||
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
values[i] = (float) rand.nextGaussian();
|
||||
}
|
||||
|
||||
float min = Floats.min(values);
|
||||
min = (float) (min < 0 ? 1.02 : .98) * min;
|
||||
float max = Floats.max(values);
|
||||
max = (float) (max < 0 ? .98 : 1.02) * max;
|
||||
final float stride = (max - min) / numBuckets;
|
||||
final float[] breaks = new float[numBreaks];
|
||||
for (int i = 0; i < numBreaks; i++) {
|
||||
breaks[i] = min + stride * i;
|
||||
}
|
||||
|
||||
Histogram h = new Histogram(breaks);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
double[] hcounts = h.asVisual().counts;
|
||||
|
||||
ApproximateHistogram ah1 = new ApproximateHistogram(resolution);
|
||||
ApproximateHistogram ah2 = new ApproximateHistogram(combinedResolution);
|
||||
ApproximateHistogram tmp = new ApproximateHistogram(resolution);
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
tmp.offer(values[i]);
|
||||
if ((i + 1) % numPerHist == 0) {
|
||||
ah1.fold(tmp);
|
||||
ah2.foldRule(tmp, null, null);
|
||||
tmp = new ApproximateHistogram(resolution);
|
||||
}
|
||||
}
|
||||
double[] ahcounts1 = ah1.toHistogram(breaks).getCounts();
|
||||
double[] ahcounts2 = ah2.toHistogram(breaks).getCounts();
|
||||
|
||||
float err1 = 0;
|
||||
float err2 = 0;
|
||||
for (int j = 0; j < hcounts.length; j++) {
|
||||
err1 += Math.abs((hcounts[j] - ahcounts1[j]) / numValues);
|
||||
err2 += Math.abs((hcounts[j] - ahcounts2[j]) / numValues);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
float sum = 0;
|
||||
for (double v : hcounts) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Exact Histogram Sum:");
|
||||
System.out.println(sum);
|
||||
sum = 0;
|
||||
for (double v : ahcounts1) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Approximate Histogram Sum:");
|
||||
System.out.println(sum);
|
||||
sum = 0;
|
||||
for (double v : ahcounts2) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Approximate Histogram Rule Fold Sum:");
|
||||
System.out.println(sum);
|
||||
System.out.println("Exact Histogram:");
|
||||
System.out.println(h.asVisual());
|
||||
System.out.println("Approximate Histogram:");
|
||||
System.out.println(ah1.toHistogram(breaks));
|
||||
System.out.println("Approximate Histogram Rule Fold:");
|
||||
System.out.println(ah2.toHistogram(breaks));
|
||||
System.out.format("Error for approximate histogram: %s \n", err1);
|
||||
System.out.format("Error for approximate histogram, ruleFold: %s \n", err2);
|
||||
System.out.format("Error ratio for AHRF: %s \n", err2 / err1);
|
||||
}
|
||||
return new float[]{err1, err2, err2 / err1};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.TestFloatColumnSelector;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class ApproximateHistogramPostAggregatorTest
|
||||
{
|
||||
static final float[] VALUES = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompute()
|
||||
{
|
||||
ApproximateHistogram ah = buildHistogram(10, VALUES);
|
||||
final TestFloatColumnSelector selector = new TestFloatColumnSelector(VALUES);
|
||||
|
||||
ApproximateHistogramAggregator agg = new ApproximateHistogramAggregator("price", selector, 10, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
|
||||
for (int i = 0; i < VALUES.length; i++) {
|
||||
agg.aggregate();
|
||||
selector.increment();
|
||||
}
|
||||
|
||||
Map<String, Object> metricValues = new HashMap<String, Object>();
|
||||
metricValues.put(agg.getName(), agg.get());
|
||||
|
||||
ApproximateHistogramPostAggregator approximateHistogramPostAggregator = new EqualBucketsPostAggregator(
|
||||
"approxHist",
|
||||
"price",
|
||||
5
|
||||
);
|
||||
Assert.assertEquals(ah.toHistogram(5), approximateHistogramPostAggregator.compute(metricValues));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.druid.collections.StupidPool;
|
||||
import io.druid.query.QueryRunner;
|
||||
import io.druid.query.QueryRunnerTestHelper;
|
||||
import io.druid.query.Result;
|
||||
import io.druid.query.TestQueryRunners;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.MaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.MinAggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.topn.TopNQuery;
|
||||
import io.druid.query.topn.TopNQueryBuilder;
|
||||
import io.druid.query.topn.TopNQueryConfig;
|
||||
import io.druid.query.topn.TopNQueryQueryToolChest;
|
||||
import io.druid.query.topn.TopNQueryRunnerFactory;
|
||||
import io.druid.query.topn.TopNResultValue;
|
||||
import io.druid.segment.TestHelper;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class ApproximateHistogramQueryTest
|
||||
{
|
||||
|
||||
private final QueryRunner runner;
|
||||
|
||||
public ApproximateHistogramQueryTest(
|
||||
QueryRunner runner
|
||||
)
|
||||
{
|
||||
this.runner = runner;
|
||||
}
|
||||
|
||||
@Parameterized.Parameters
|
||||
public static Collection<?> constructorFeeder() throws IOException
|
||||
{
|
||||
List<Object> retVal = Lists.newArrayList();
|
||||
retVal.addAll(
|
||||
QueryRunnerTestHelper.makeQueryRunners(
|
||||
new TopNQueryRunnerFactory(
|
||||
TestQueryRunners.getPool(),
|
||||
new TopNQueryQueryToolChest(new TopNQueryConfig()),
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
)
|
||||
)
|
||||
);
|
||||
retVal.addAll(
|
||||
QueryRunnerTestHelper.makeQueryRunners(
|
||||
new TopNQueryRunnerFactory(
|
||||
new StupidPool<ByteBuffer>(
|
||||
new Supplier<ByteBuffer>()
|
||||
{
|
||||
@Override
|
||||
public ByteBuffer get()
|
||||
{
|
||||
return ByteBuffer.allocate(2000);
|
||||
}
|
||||
}
|
||||
),
|
||||
new TopNQueryQueryToolChest(new TopNQueryConfig()),
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNWithApproximateHistogramAgg()
|
||||
{
|
||||
ApproximateHistogramAggregatorFactory factory = new ApproximateHistogramAggregatorFactory(
|
||||
"apphisto",
|
||||
"index",
|
||||
10,
|
||||
5,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.POSITIVE_INFINITY
|
||||
);
|
||||
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(QueryRunnerTestHelper.providerDimension)
|
||||
.metric(QueryRunnerTestHelper.dependentPostAggMetric)
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.fullOnInterval)
|
||||
.aggregators(
|
||||
Lists.<AggregatorFactory>newArrayList(
|
||||
Iterables.concat(
|
||||
QueryRunnerTestHelper.commonAggregators,
|
||||
Lists.newArrayList(
|
||||
new MaxAggregatorFactory("maxIndex", "index"),
|
||||
new MinAggregatorFactory("minIndex", "index"),
|
||||
factory
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
.postAggregators(
|
||||
Arrays.<PostAggregator>asList(
|
||||
QueryRunnerTestHelper.addRowsIndexConstant,
|
||||
QueryRunnerTestHelper.dependentPostAgg,
|
||||
new QuantilePostAggregator("quantile", "apphisto", 0.5f)
|
||||
)
|
||||
)
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<TopNResultValue>(
|
||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "total_market")
|
||||
.put("rows", 186L)
|
||||
.put("index", 215679.82879638672D)
|
||||
.put("addRowsIndexConstant", 215866.82879638672D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 216053.82879638672D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
|
||||
.put("maxIndex", 1743.9217529296875D)
|
||||
.put("minIndex", 792.3260498046875D)
|
||||
.put("quantile", 1085.6775f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
554.4271240234375f,
|
||||
792.3260498046875f,
|
||||
1030.2249755859375f,
|
||||
1268.1239013671875f,
|
||||
1506.0228271484375f,
|
||||
1743.9217529296875f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
39.42073059082031D,
|
||||
103.29110717773438D,
|
||||
34.93659591674805D,
|
||||
8.351564407348633D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build(),
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "upfront")
|
||||
.put("rows", 186L)
|
||||
.put("index", 192046.1060180664D)
|
||||
.put("addRowsIndexConstant", 192233.1060180664D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 192420.1060180664D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
|
||||
.put("maxIndex", 1870.06103515625D)
|
||||
.put("minIndex", 545.9906005859375D)
|
||||
.put("quantile", 880.9881f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
214.97299194335938f,
|
||||
545.9906005859375f,
|
||||
877.0081787109375f,
|
||||
1208.0257568359375f,
|
||||
1539.0433349609375f,
|
||||
1870.06103515625f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
67.53287506103516D,
|
||||
72.22068786621094D,
|
||||
31.984678268432617D,
|
||||
14.261756896972656D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build(),
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "spot")
|
||||
.put("rows", 837L)
|
||||
.put("index", 95606.57232284546D)
|
||||
.put("addRowsIndexConstant", 96444.57232284546D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 97282.57232284546D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
|
||||
.put("maxIndex", 277.2735290527344D)
|
||||
.put("minIndex", 59.02102279663086D)
|
||||
.put("quantile", 101.78856f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
4.457897186279297f,
|
||||
59.02102279663086f,
|
||||
113.58415222167969f,
|
||||
168.14727783203125f,
|
||||
222.7104034423828f,
|
||||
277.2735290527344f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
462.4309997558594D,
|
||||
357.5404968261719D,
|
||||
15.022850036621094D,
|
||||
2.0056631565093994D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build()
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,588 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
public class ApproximateHistogramTest
|
||||
{
|
||||
static final float[] VALUES = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45};
|
||||
static final float[] VALUES2 = {23, 19, 10, 16, 36, 2, 1, 9, 32, 30, 45, 46};
|
||||
|
||||
static final float[] VALUES3 = {
|
||||
20, 16, 19, 27, 17, 20, 18, 20, 28, 14, 17, 21, 20, 21, 10, 25, 23, 17, 21, 18,
|
||||
14, 20, 18, 12, 19, 20, 23, 25, 15, 22, 14, 17, 15, 23, 23, 15, 27, 20, 17, 15
|
||||
};
|
||||
static final float[] VALUES4 = {
|
||||
27.489f, 3.085f, 3.722f, 66.875f, 30.998f, -8.193f, 5.395f, 5.109f, 10.944f, 54.75f,
|
||||
14.092f, 15.604f, 52.856f, 66.034f, 22.004f, -14.682f, -50.985f, 2.872f, 61.013f,
|
||||
-21.766f, 19.172f, 62.882f, 33.537f, 21.081f, 67.115f, 44.789f, 64.1f, 20.911f,
|
||||
-6.553f, 2.178f
|
||||
};
|
||||
static final float[] VALUES5 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
static final float[] VALUES6 = {1f, 1.5f, 2f, 2.5f, 3f, 3.5f, 4f, 4.5f, 5f, 5.5f, 6f, 6.5f, 7f, 7.5f, 8f, 8.5f, 9f, 9.5f, 10f};
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values, float lowerLimit, float upperLimit)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size, lowerLimit, upperLimit);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOffer() throws Exception
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
|
||||
// (2, 1), (9.5, 2), (19.33, 3), (32.67, 3), (45, 1)
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new long[]{1, 2, 3, 3, 1}, h.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("min value matches expexted min", 2, h.min(), 0);
|
||||
Assert.assertEquals("max value matches expexted max", 45, h.max(), 0);
|
||||
|
||||
Assert.assertEquals("bin count matches expected bin count", 5, h.binCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFold()
|
||||
{
|
||||
ApproximateHistogram merged = new ApproximateHistogram(0);
|
||||
ApproximateHistogram mergedFast = new ApproximateHistogram(0);
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(5);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
h1.offer(VALUES[i]);
|
||||
}
|
||||
for (int i = 5; i < VALUES.length; ++i) {
|
||||
h2.offer(VALUES[i]);
|
||||
}
|
||||
|
||||
merged.fold(h1);
|
||||
merged.fold(h2);
|
||||
mergedFast.foldFast(h1);
|
||||
mergedFast.foldFast(h2);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, merged.positions(), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{11.2f, 30.25f, 45f}, mergedFast.positions(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{1, 2, 3, 3, 1}, merged.bins()
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{5, 4, 1}, mergedFast.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("merged max matches expected value", 45f, merged.max(), 0.1f);
|
||||
Assert.assertEquals("mergedfast max matches expected value", 45f, mergedFast.max(), 0.1f);
|
||||
Assert.assertEquals("merged min matches expected value", 2f, merged.min(), 0.1f);
|
||||
Assert.assertEquals("mergedfast min matches expected value", 2f, mergedFast.min(), 0.1f);
|
||||
|
||||
// fold where merged bincount is less than total bincount
|
||||
ApproximateHistogram a = buildHistogram(10, new float[]{1, 2, 3, 4, 5, 6});
|
||||
ApproximateHistogram aFast = buildHistogram(10, new float[]{1, 2, 3, 4, 5, 6});
|
||||
ApproximateHistogram b = buildHistogram(5, new float[]{3, 4, 5, 6});
|
||||
|
||||
a.fold(b);
|
||||
aFast.foldFast(b);
|
||||
|
||||
Assert.assertEquals(
|
||||
new ApproximateHistogram(
|
||||
6,
|
||||
new float[]{1, 2, 3, 4, 5, 6, 0, 0, 0, 0},
|
||||
new long[]{1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
|
||||
1, 6
|
||||
), a
|
||||
);
|
||||
Assert.assertEquals(
|
||||
new ApproximateHistogram(
|
||||
6,
|
||||
new float[]{1, 2, 3, 4, 5, 6, 0, 0, 0, 0},
|
||||
new long[]{1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
|
||||
1, 6
|
||||
), aFast
|
||||
);
|
||||
|
||||
ApproximateHistogram h3 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4 = new ApproximateHistogram(10);
|
||||
for (float v : VALUES3) {
|
||||
h3.offer(v);
|
||||
}
|
||||
for (float v : VALUES4) {
|
||||
h4.offer(v);
|
||||
}
|
||||
h3.fold(h4);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{-50.98f, -21.77f, -9.81f, 3.73f, 13.72f, 20.1f, 29f, 44.79f, 53.8f, 64.67f},
|
||||
h3.positions(), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{1, 1, 3, 6, 12, 32, 6, 1, 2, 6}, h3.bins()
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFoldNothing() throws Exception
|
||||
{
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
|
||||
h1.fold(h2);
|
||||
h1.foldFast(h2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFoldNothing2() throws Exception
|
||||
{
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h1Fast = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h3 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4Fast = new ApproximateHistogram(10);
|
||||
for (float v : VALUES3) {
|
||||
h3.offer(v);
|
||||
h4.offer(v);
|
||||
h4Fast.offer(v);
|
||||
}
|
||||
|
||||
h1.fold(h3);
|
||||
h4.fold(h2);
|
||||
h1Fast.foldFast(h3);
|
||||
h4Fast.foldFast(h2);
|
||||
|
||||
Assert.assertEquals(h3, h1);
|
||||
Assert.assertEquals(h4, h3);
|
||||
Assert.assertEquals(h3, h1Fast);
|
||||
Assert.assertEquals(h3, h4Fast);
|
||||
}
|
||||
|
||||
//@Test
|
||||
public void testFoldSpeed()
|
||||
{
|
||||
final int combinedHistSize = 200;
|
||||
final int histSize = 50;
|
||||
final int numRand = 10000;
|
||||
ApproximateHistogram h = new ApproximateHistogram(combinedHistSize);
|
||||
Random rand = new Random(0);
|
||||
//for(int i = 0; i < 200; ++i) h.offer((float)(rand.nextGaussian() * 50.0));
|
||||
long tFold = 0;
|
||||
int count = 5000000;
|
||||
Float[] randNums = new Float[numRand];
|
||||
for (int i = 0; i < numRand; i++) {
|
||||
randNums[i] = (float) rand.nextGaussian();
|
||||
}
|
||||
|
||||
List<ApproximateHistogram> randHist = Lists.newLinkedList();
|
||||
Iterator<ApproximateHistogram> it = Iterators.cycle(randHist);
|
||||
|
||||
for(int k = 0; k < numRand; ++k) {
|
||||
ApproximateHistogram tmp = new ApproximateHistogram(histSize);
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
tmp.offer((float) (rand.nextGaussian() + (double)k));
|
||||
}
|
||||
randHist.add(tmp);
|
||||
}
|
||||
|
||||
float[] mergeBufferP = new float[combinedHistSize * 2];
|
||||
long[] mergeBufferB = new long[combinedHistSize * 2];
|
||||
float[] mergeBufferD = new float[combinedHistSize * 2];
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
ApproximateHistogram tmp = it.next();
|
||||
|
||||
long t0 = System.nanoTime();
|
||||
//h.fold(tmp, mergeBufferP, mergeBufferB, mergeBufferD);
|
||||
h.foldFast(tmp, mergeBufferP, mergeBufferB);
|
||||
tFold += System.nanoTime() - t0;
|
||||
}
|
||||
|
||||
System.out.println(String.format("Average folds per second : %f", (double) count / (double) tFold * 1e9));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSum()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
|
||||
Assert.assertEquals(0.0f, h.sum(0), 0.01);
|
||||
Assert.assertEquals(1.0f, h.sum(2), 0.01);
|
||||
Assert.assertEquals(1.16f, h.sum(5), 0.01);
|
||||
Assert.assertEquals(3.28f, h.sum(15), 0.01);
|
||||
Assert.assertEquals(VALUES.length, h.sum(45), 0.01);
|
||||
Assert.assertEquals(VALUES.length, h.sum(46), 0.01);
|
||||
|
||||
ApproximateHistogram h2 = buildHistogram(5, VALUES2);
|
||||
|
||||
Assert.assertEquals(0.0f, h2.sum(0), 0.01);
|
||||
Assert.assertEquals(0.0f, h2.sum(1f), 0.01);
|
||||
Assert.assertEquals(1.0f, h2.sum(1.5f), 0.01);
|
||||
Assert.assertEquals(1.125f, h2.sum(2f), 0.001);
|
||||
Assert.assertEquals(2.0625f, h2.sum(5.75f), 0.001);
|
||||
Assert.assertEquals(3.0f, h2.sum(9.5f), 0.01);
|
||||
Assert.assertEquals(11.0f, h2.sum(45.5f), 0.01);
|
||||
Assert.assertEquals(12.0f, h2.sum(46f), 0.01);
|
||||
Assert.assertEquals(12.0f, h2.sum(47f), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeCompact()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(50).fold(h);
|
||||
Assert.assertEquals(h2, ApproximateHistogram.fromBytes(h2.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeDense()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
ByteBuffer buf = ByteBuffer.allocate(h.getDenseStorageSize());
|
||||
h.toBytesDense(buf);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(buf.array()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeSparse()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
ByteBuffer buf = ByteBuffer.allocate(h.getSparseStorageSize());
|
||||
h.toBytesSparse(buf);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(buf.array()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeCompactExact()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(50, new float[]{1f, 2f, 3f, 4f, 5f});
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
h = buildHistogram(5, new float[]{1f, 2f, 3f});
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
h = new ApproximateHistogram(40).fold(h);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeEmpty()
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(50);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileSmaller()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(20, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{3.33f, 6.67f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.5f, 5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4f, 6f, 8f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileEqualSize()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(10, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{3.33f, 6.67f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.5f, 5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4f, 6f, 8f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileBigger()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{4.5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.83f, 6.17f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4.5f, 7f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1.5f, 3.5f, 5.5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileBigger2()
|
||||
{
|
||||
float[] thousand = new float[1000];
|
||||
for (int i = 1; i <= 1000; ++i) {
|
||||
thousand[i - 1] = i;
|
||||
}
|
||||
ApproximateHistogram h = buildHistogram(100, thousand);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{493.5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{327.5f, 662f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{244.5f, 493.5f, 746f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{96.5f, 196.53f, 294.5f, 395.5f, 493.5f, 597f, 696f, 795f, 895.25f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLimitSum()
|
||||
{
|
||||
final float lowerLimit = 0f;
|
||||
final float upperLimit = 10f;
|
||||
|
||||
ApproximateHistogram h = buildHistogram(15, VALUES6, lowerLimit, upperLimit);
|
||||
|
||||
for (int i = 1; i <= 20; ++i) {
|
||||
ApproximateHistogram hLow = new ApproximateHistogram(5);
|
||||
ApproximateHistogram hHigh = new ApproximateHistogram(5);
|
||||
hLow.offer(lowerLimit - i);
|
||||
hHigh.offer(upperLimit + i);
|
||||
h.foldFast(hLow);
|
||||
h.foldFast(hHigh);
|
||||
}
|
||||
|
||||
Assert.assertEquals(20f, h.sum(lowerLimit), .7f);
|
||||
Assert.assertEquals(VALUES6.length + 20f, h.sum(upperLimit), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets()
|
||||
{
|
||||
final float[] values = new float[]{-5f, .01f, .02f, .06f, .12f, 1f, 2f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(.05f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{1f, 2f, 1f, 1f, 0f, 1f, 1f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-5.05f, 0f, .05f, .1f, .15f, .95f, 1f, 2f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets2()
|
||||
{
|
||||
final float[] values = new float[]{-5f, .01f, .02f, .06f, .12f, .94f, 1f, 2f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(.05f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{1f, 2f, 1f, 1f, 0f, 1f, 1f, 1f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-5.05f, 0f, .05f, .1f, .15f, .9f, .95f, 1f, 2.05f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets3()
|
||||
{
|
||||
final float[] values = new float[]{0f, 0f, .02f, .06f, .12f, .94f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(1f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{2f, 4f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-1f, 0f, 1f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets4()
|
||||
{
|
||||
final float[] values = new float[]{0f, 0f, 0.01f, 0.51f, 0.6f,0.8f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0.5f,1f);
|
||||
Histogram h3 = h.toHistogram(0.2f,0);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected counts match actual counts",
|
||||
new double[]{3f,2f,1f},
|
||||
h3.getCounts(),
|
||||
0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-0.2f,0.5f,0.7f,0.9f},
|
||||
h3.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test public void testBuckets5()
|
||||
{
|
||||
final float[] values = new float[]{0.1f,0.5f,0.6f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f,1f);
|
||||
Histogram h4 = h.toHistogram(0.5f,0);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected counts match actual counts",
|
||||
new double[]{2,1},
|
||||
h4.getCounts(),
|
||||
0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected breaks match actual breaks",
|
||||
new double[]{0f,0.5f,1f},
|
||||
h4.getBreaks(),
|
||||
0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test public void testEmptyHistogram() {
|
||||
ApproximateHistogram h = new ApproximateHistogram(50);
|
||||
Assert.assertArrayEquals(
|
||||
new float[]{Float.NaN, Float.NaN},
|
||||
h.getQuantiles(new float[]{0.8f, 0.9f}),
|
||||
1e-9f
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import org.hamcrest.CoreMatchers;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
public class QuantilesTest
|
||||
{
|
||||
@Test
|
||||
public void testSerialization() throws Exception
|
||||
{
|
||||
ObjectMapper mapper = new DefaultObjectMapper();
|
||||
|
||||
float[] probabilities = new float[]{0.25f, 0.5f, 0.75f};
|
||||
float[] quantiles = new float[]{0.25f, 0.5f, 0.75f};
|
||||
float min = 0f;
|
||||
float max = 4f;
|
||||
|
||||
String theString = mapper.writeValueAsString(
|
||||
new Quantiles(probabilities, quantiles, min, max)
|
||||
);
|
||||
|
||||
Object theObject = mapper.readValue(theString, Object.class);
|
||||
Assert.assertThat(theObject, CoreMatchers.instanceOf(LinkedHashMap.class));
|
||||
|
||||
LinkedHashMap theMap = (LinkedHashMap) theObject;
|
||||
|
||||
ArrayList theProbabilities = (ArrayList<Float>) theMap.get("probabilities");
|
||||
|
||||
Assert.assertEquals(probabilities.length, theProbabilities.size());
|
||||
for (int i = 0; i < theProbabilities.size(); ++i) {
|
||||
Assert.assertEquals(probabilities[i], ((Number) theProbabilities.get(i)).floatValue(), 0.0001f);
|
||||
}
|
||||
|
||||
ArrayList theQuantiles = (ArrayList<Float>) theMap.get("quantiles");
|
||||
|
||||
Assert.assertEquals(quantiles.length, theQuantiles.size());
|
||||
for (int i = 0; i < theQuantiles.size(); ++i) {
|
||||
Assert.assertEquals(quantiles[i], ((Number) theQuantiles.get(i)).floatValue(), 0.0001f);
|
||||
}
|
||||
|
||||
Assert.assertEquals(
|
||||
"serialized min. matches expected min.",
|
||||
min,
|
||||
((Number) theMap.get("min")).floatValue(),
|
||||
0.0001f
|
||||
);
|
||||
Assert.assertEquals(
|
||||
"serialized max. matches expected max.",
|
||||
max,
|
||||
((Number) theMap.get("max")).floatValue(),
|
||||
0.0001f
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -179,7 +179,16 @@ public class DetermineHashedPartitionsJob implements Jobby
|
|||
actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++));
|
||||
} else {
|
||||
for (int i = 0; i < numberOfShards; ++i) {
|
||||
actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards), shardCount++));
|
||||
actualSpecs.add(
|
||||
new HadoopyShardSpec(
|
||||
new HashBasedNumberedShardSpec(
|
||||
i,
|
||||
numberOfShards,
|
||||
HadoopDruidIndexerConfig.jsonMapper
|
||||
),
|
||||
shardCount++
|
||||
)
|
||||
);
|
||||
log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ public class HadoopDruidDetermineConfigurationJob implements Jobby
|
|||
for (int i = 0; i < shardsPerInterval; i++) {
|
||||
specs.add(
|
||||
new HadoopyShardSpec(
|
||||
new HashBasedNumberedShardSpec(i, shardsPerInterval),
|
||||
new HashBasedNumberedShardSpec(i, shardsPerInterval, HadoopDruidIndexerConfig.jsonMapper),
|
||||
shardCount++
|
||||
)
|
||||
);
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
|
|||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.api.client.util.Maps;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Optional;
|
||||
|
@ -30,25 +31,28 @@ import com.google.common.base.Preconditions;
|
|||
import com.google.common.base.Splitter;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.inject.Binder;
|
||||
import com.google.inject.Injector;
|
||||
import com.google.inject.Key;
|
||||
import com.google.inject.Module;
|
||||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.guava.FunctionalIterable;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.common.utils.JodaUtils;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.data.input.impl.StringInputRowParser;
|
||||
import io.druid.guice.GuiceInjectors;
|
||||
import io.druid.guice.JsonConfigProvider;
|
||||
import io.druid.guice.annotations.Self;
|
||||
import io.druid.indexer.partitions.PartitionsSpec;
|
||||
import io.druid.indexer.path.PathSpec;
|
||||
import io.druid.initialization.Initialization;
|
||||
import io.druid.segment.column.ColumnConfig;
|
||||
import io.druid.segment.indexing.granularity.GranularitySpec;
|
||||
import io.druid.server.DruidNode;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import io.druid.timeline.partition.ShardSpec;
|
||||
import io.druid.timeline.partition.ShardSpecLookup;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
@ -81,7 +85,7 @@ public class HadoopDruidIndexerConfig
|
|||
|
||||
static {
|
||||
injector = Initialization.makeInjectorWithModules(
|
||||
Initialization.makeStartupInjector(),
|
||||
GuiceInjectors.makeStartupInjector(),
|
||||
ImmutableList.<Object>of(
|
||||
new Module()
|
||||
{
|
||||
|
@ -166,14 +170,41 @@ public class HadoopDruidIndexerConfig
|
|||
|
||||
private volatile HadoopIngestionSpec schema;
|
||||
private volatile PathSpec pathSpec;
|
||||
private volatile ColumnConfig columnConfig;
|
||||
private volatile Map<DateTime,ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
|
||||
private volatile Map<ShardSpec, HadoopyShardSpec> hadoopShardSpecLookup = Maps.newHashMap();
|
||||
|
||||
@JsonCreator
|
||||
public HadoopDruidIndexerConfig(
|
||||
final @JsonProperty("schema") HadoopIngestionSpec schema
|
||||
)
|
||||
{
|
||||
this.columnConfig = columnConfig;
|
||||
this.schema = schema;
|
||||
this.pathSpec = jsonMapper.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
|
||||
for (Map.Entry<DateTime, List<HadoopyShardSpec>> entry : schema.getTuningConfig().getShardSpecs().entrySet()) {
|
||||
if (entry.getValue() == null || entry.getValue().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
final ShardSpec actualSpec = entry.getValue().get(0).getActualSpec();
|
||||
shardSpecLookups.put(
|
||||
entry.getKey(), actualSpec.getLookup(
|
||||
Lists.transform(
|
||||
entry.getValue(), new Function<HadoopyShardSpec, ShardSpec>()
|
||||
{
|
||||
@Override
|
||||
public ShardSpec apply(HadoopyShardSpec input)
|
||||
{
|
||||
return input.getActualSpec();
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
for (HadoopyShardSpec hadoopyShardSpec : entry.getValue()) {
|
||||
hadoopShardSpecLookup.put(hadoopyShardSpec.getActualSpec(), hadoopyShardSpec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
|
@ -182,6 +213,11 @@ public class HadoopDruidIndexerConfig
|
|||
return schema;
|
||||
}
|
||||
|
||||
public ColumnConfig getColumnConfig()
|
||||
{
|
||||
return columnConfig;
|
||||
}
|
||||
|
||||
public String getDataSource()
|
||||
{
|
||||
return schema.getDataSchema().getDataSource();
|
||||
|
@ -297,25 +333,17 @@ public class HadoopDruidIndexerConfig
|
|||
return Optional.absent();
|
||||
}
|
||||
|
||||
final List<HadoopyShardSpec> shards = schema.getTuningConfig().getShardSpecs().get(timeBucket.get().getStart());
|
||||
if (shards == null || shards.isEmpty()) {
|
||||
return Optional.absent();
|
||||
}
|
||||
final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart()).getShardSpec(inputRow);
|
||||
final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(actualSpec);
|
||||
|
||||
for (final HadoopyShardSpec hadoopyShardSpec : shards) {
|
||||
final ShardSpec actualSpec = hadoopyShardSpec.getActualSpec();
|
||||
if (actualSpec.isInChunk(inputRow)) {
|
||||
return Optional.of(
|
||||
new Bucket(
|
||||
hadoopyShardSpec.getShardNum(),
|
||||
timeBucket.get().getStart(),
|
||||
actualSpec.getPartitionNum()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
return Optional.of(
|
||||
new Bucket(
|
||||
hadoopyShardSpec.getShardNum(),
|
||||
timeBucket.get().getStart(),
|
||||
actualSpec.getPartitionNum()
|
||||
)
|
||||
);
|
||||
|
||||
throw new ISE("row[%s] doesn't fit in any shard[%s]", inputRow, shards);
|
||||
}
|
||||
|
||||
public Optional<Set<Interval>> getSegmentGranularIntervals()
|
||||
|
|
|
@ -31,6 +31,7 @@ import com.google.common.io.Closeables;
|
|||
import com.google.common.primitives.Longs;
|
||||
import com.metamx.common.IAE;
|
||||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.guava.CloseQuietly;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.data.input.impl.StringInputRowParser;
|
||||
|
@ -425,7 +426,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
if (caughtException == null) {
|
||||
Closeables.close(out, false);
|
||||
} else {
|
||||
Closeables.closeQuietly(out);
|
||||
CloseQuietly.close(out);
|
||||
throw Throwables.propagate(caughtException);
|
||||
}
|
||||
}
|
||||
|
@ -605,7 +606,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
}
|
||||
}
|
||||
finally {
|
||||
Closeables.closeQuietly(in);
|
||||
CloseQuietly.close(in);
|
||||
}
|
||||
out.closeEntry();
|
||||
context.progress();
|
||||
|
|
|
@ -45,7 +45,6 @@ public class HadoopDruidIndexerConfigTest
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void shouldMakeHDFSCompliantSegmentOutputPath()
|
||||
{
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.122-SNAPSHOT</version>
|
||||
<version>0.6.139-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -24,8 +24,9 @@ import com.fasterxml.jackson.databind.jsontype.NamedType;
|
|||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.inject.Binder;
|
||||
import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory;
|
||||
import io.druid.indexing.firehose.IngestSegmentFirehoseFactory;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -37,7 +38,8 @@ public class IndexingServiceFirehoseModule implements DruidModule
|
|||
return ImmutableList.<Module>of(
|
||||
new SimpleModule("IndexingServiceFirehoseModule")
|
||||
.registerSubtypes(
|
||||
new NamedType(EventReceiverFirehoseFactory.class, "receiver")
|
||||
new NamedType(EventReceiverFirehoseFactory.class, "receiver"),
|
||||
new NamedType(IngestSegmentFirehoseFactory.class, "ingestSegment")
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
|
@ -49,7 +49,6 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
/**
|
||||
|
@ -187,7 +186,8 @@ public class TaskToolbox
|
|||
return retVal;
|
||||
}
|
||||
|
||||
public void pushSegments(Iterable<DataSegment> segments) throws IOException {
|
||||
public void pushSegments(Iterable<DataSegment> segments) throws IOException
|
||||
{
|
||||
// Request segment pushes for each set
|
||||
final Multimap<Interval, DataSegment> segmentMultimap = Multimaps.index(
|
||||
segments,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue