mirror of https://github.com/apache/druid.git
Merge pull request #625 from metamx/approx-histo
Approximate Histograms and Quantiles
This commit is contained in:
commit
a16bdf9759
|
@ -155,8 +155,33 @@ Determine the number of distinct are assigned to.
|
|||
|
||||
### HyperUnique aggregator
|
||||
|
||||
Uses [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) to compute the estimated cardinality of a dimension that has been aggregated as a hyperUnique metric at indexing time.
|
||||
Uses [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) to compute the estimated cardinality of a dimension that has been aggregated as a "hyperUnique" metric at indexing time.
|
||||
|
||||
```json
|
||||
{ "type" : "hyperUnique", "name" : <output_name>, "fieldName" : <metric_name> }
|
||||
```
|
||||
|
||||
### ApproxHistogram aggregator
|
||||
|
||||
This aggregator is based on [http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf) to compute approximate histograms.
|
||||
|
||||
To use this feature, an "approxHistogram" aggregator must be included at indexing time. The ingestion aggregator can only apply to numeric values. To query for results, an "approxHistogramFold" aggregator must be included in the query.
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "approxHistogram(ingestion), approxHistogramFold(query)",
|
||||
"name" : <output_name>,
|
||||
"fieldName" : <metric_name>,
|
||||
"resolution" : <integer>,
|
||||
"numBuckets" : <integer>,
|
||||
"lowerLimit" : <float>,
|
||||
"upperLimit" : <float>
|
||||
}
|
||||
```
|
||||
|
||||
|Property|Description|Default|
|
||||
|--------|-----------|-------|
|
||||
|`resolution`|Number of centroids (data points) to store. The higher the resolution, the more accurate results are, but the slower computation will be.|50|
|
||||
|`numBuckets`|Number of output buckets for the resulting histogram.|7|
|
||||
|`lowerLimit`/`upperLimit`|Restrict the approximation to the given range. The values outside this range will be aggregated into two centroids. Counts of values outside this range are still maintained. |-INF/+INF|
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ Example JavaScript aggregator:
|
|||
"function": "function(delta, total) { return 100 * Math.abs(delta) / total; }"
|
||||
}
|
||||
```
|
||||
### `hyperUniqueCardinality` post-aggregator
|
||||
### HyperUnique Cardinality post-aggregator
|
||||
|
||||
The hyperUniqueCardinality post aggregator is used to wrap a hyperUnique object such that it can be used in post aggregations.
|
||||
|
||||
|
@ -90,8 +90,7 @@ It can be used in a sample calculation as so:
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
### Example Usage
|
||||
#### Example Usage
|
||||
|
||||
In this example, let’s calculate a simple percentage using post aggregators. Let’s imagine our data set has a metric called "total".
|
||||
|
||||
|
@ -122,5 +121,70 @@ The format of the query JSON is as follows:
|
|||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
### Approximate Histogram post-aggregators
|
||||
|
||||
Post-aggregators used to transform opaque approximate histogram objects
|
||||
into actual histogram representations, and to compute various distribution metrics.
|
||||
|
||||
#### equal buckets post-aggregator
|
||||
|
||||
Computes a visual representation of the approximate histogram with a given number of equal-sized bins
|
||||
|
||||
```json
|
||||
{ "type" : "equalBuckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"numBuckets" : <count> }
|
||||
```
|
||||
|
||||
#### buckets post-aggregator
|
||||
|
||||
Computes a visual representation given an initial breakpoint, offset, and a bucket size.
|
||||
|
||||
```json
|
||||
{ "type" : "buckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"bucketSize" : <bucket_size>, "offset" : <offset> }
|
||||
```
|
||||
|
||||
#### custom buckets post-aggregator
|
||||
|
||||
Computes a visual representation of the approximate histogram with bins laid out according to the given breaks
|
||||
|
||||
```json
|
||||
{ "type" : "customBuckets", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"breaks" : [ <value>, <value>, ... ] }
|
||||
```
|
||||
|
||||
#### min post-aggregator
|
||||
|
||||
Returns the minimum value of the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "min", "name" : <output_name>, "fieldName" : <aggregator_name> }
|
||||
```
|
||||
|
||||
#### max post-aggregator
|
||||
|
||||
Returns the maximum value of the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "max", "name" : <output_name>, "fieldName" : <aggregator_name> }
|
||||
```
|
||||
|
||||
#### quantile post-aggregator
|
||||
|
||||
Computes a single quantile based on the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "quantile", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"probability" : <quantile> }
|
||||
```
|
||||
|
||||
#### quantiles post-aggregator
|
||||
|
||||
Computes an array of quantiles based on the underlying approximate histogram aggregator
|
||||
|
||||
```json
|
||||
{ "type" : "quantiles", "name" : <output_name>, "fieldName" : <aggregator_name>,
|
||||
"probabilities" : [ <quantile>, <quantile>, ... ] }
|
||||
```
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Druid - a distributed column store.
|
||||
~ Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
~
|
||||
~ This program is free software; you can redistribute it and/or
|
||||
~ modify it under the terms of the GNU General Public License
|
||||
~ as published by the Free Software Foundation; either version 2
|
||||
~ of the License, or (at your option) any later version.
|
||||
~
|
||||
~ This program is distributed in the hope that it will be useful,
|
||||
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
~ GNU General Public License for more details.
|
||||
~
|
||||
~ You should have received a copy of the GNU General Public License
|
||||
~ along with this program; if not, write to the Free Software
|
||||
~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>io.druid.extensions</groupId>
|
||||
<artifactId>druid-histogram</artifactId>
|
||||
<name>druid-histogram</name>
|
||||
<description>druid-histogram</description>
|
||||
|
||||
<parent>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>0.6.129-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid-processing</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Tests -->
|
||||
<dependency>
|
||||
<groupId>io.druid</groupId>
|
||||
<artifactId>druid-processing</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
|
||||
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.primitives.Longs;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.segment.FloatColumnSelector;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class ApproximateHistogramAggregator implements Aggregator
|
||||
{
|
||||
public static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Longs.compare(((ApproximateHistogram) o).count(), ((ApproximateHistogram) o1).count());
|
||||
}
|
||||
};
|
||||
|
||||
static Object combineHistograms(Object lhs, Object rhs)
|
||||
{
|
||||
return ((ApproximateHistogram) lhs).foldFast((ApproximateHistogram) rhs);
|
||||
}
|
||||
|
||||
private final String name;
|
||||
private final FloatColumnSelector selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
private ApproximateHistogram histogram;
|
||||
|
||||
public ApproximateHistogramAggregator(
|
||||
String name,
|
||||
FloatColumnSelector selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
histogram.offer(selector.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get()
|
||||
{
|
||||
return histogram;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.primitives.Floats;
|
||||
import com.google.common.primitives.Ints;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@JsonTypeName("approxHistogram")
|
||||
public class ApproximateHistogramAggregatorFactory implements AggregatorFactory
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x8;
|
||||
|
||||
protected final String name;
|
||||
protected final String fieldName;
|
||||
|
||||
protected final int resolution;
|
||||
protected final int numBuckets;
|
||||
|
||||
protected final float lowerLimit;
|
||||
protected final float upperLimit;
|
||||
|
||||
@JsonCreator
|
||||
public ApproximateHistogramAggregatorFactory(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("resolution") Integer resolution,
|
||||
@JsonProperty("numBuckets") Integer numBuckets,
|
||||
@JsonProperty("lowerLimit") Float lowerLimit,
|
||||
@JsonProperty("upperLimit") Float upperLimit
|
||||
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.fieldName = fieldName.toLowerCase();
|
||||
this.resolution = resolution == null ? ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE : resolution;
|
||||
this.numBuckets = numBuckets == null ? ApproximateHistogram.DEFAULT_BUCKET_SIZE : numBuckets;
|
||||
this.lowerLimit = lowerLimit == null ? Float.NEGATIVE_INFINITY : lowerLimit;
|
||||
this.upperLimit = upperLimit == null ? Float.POSITIVE_INFINITY : upperLimit;
|
||||
|
||||
Preconditions.checkArgument(this.resolution > 0, "resolution must be greater than 1");
|
||||
Preconditions.checkArgument(this.numBuckets > 0, "numBuckets must be greater than 1");
|
||||
Preconditions.checkArgument(this.upperLimit > this.lowerLimit, "upperLimit must be greater than lowerLimit");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
return new ApproximateHistogramAggregator(
|
||||
name,
|
||||
metricFactory.makeFloatColumnSelector(fieldName),
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
return new ApproximateHistogramBufferAggregator(
|
||||
metricFactory.makeFloatColumnSelector(fieldName),
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return ApproximateHistogramAggregator.COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object combine(Object lhs, Object rhs)
|
||||
{
|
||||
return ApproximateHistogramAggregator.combineHistograms(lhs, rhs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getCombiningFactory()
|
||||
{
|
||||
return new ApproximateHistogramAggregatorFactory(name, name, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
return Arrays.<AggregatorFactory>asList(
|
||||
new ApproximateHistogramAggregatorFactory(
|
||||
fieldName,
|
||||
fieldName,
|
||||
resolution,
|
||||
numBuckets,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object deserialize(Object object)
|
||||
{
|
||||
if (object instanceof byte[]) {
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes((byte[]) object);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else if (object instanceof ByteBuffer) {
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes((ByteBuffer) object);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else if (object instanceof String) {
|
||||
byte[] bytes = Base64.decodeBase64(((String) object).getBytes(Charsets.UTF_8));
|
||||
final ApproximateHistogram ah = ApproximateHistogram.fromBytes(bytes);
|
||||
ah.setLowerLimit(lowerLimit);
|
||||
ah.setUpperLimit(upperLimit);
|
||||
|
||||
return ah;
|
||||
} else {
|
||||
return object;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object finalizeComputation(Object object)
|
||||
{
|
||||
return ((ApproximateHistogram) object).toHistogram(numBuckets);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFieldName()
|
||||
{
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getResolution()
|
||||
{
|
||||
return resolution;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getLowerLimit()
|
||||
{
|
||||
return lowerLimit;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getUpperLimit()
|
||||
{
|
||||
return upperLimit;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getNumBuckets()
|
||||
{
|
||||
return numBuckets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> requiredFields()
|
||||
{
|
||||
return Arrays.asList(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] fieldNameBytes = fieldName.getBytes(Charsets.UTF_8);
|
||||
return ByteBuffer.allocate(1 + fieldNameBytes.length + Ints.BYTES * 2 + Floats.BYTES * 2)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.put(fieldNameBytes)
|
||||
.putInt(resolution)
|
||||
.putInt(numBuckets)
|
||||
.putFloat(lowerLimit)
|
||||
.putFloat(upperLimit).array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return "approximateHistogram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxIntermediateSize()
|
||||
{
|
||||
return new ApproximateHistogram(resolution).getMaxStorageSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getAggregatorStartValue()
|
||||
{
|
||||
return new ApproximateHistogram(resolution);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "ApproximateHistogramAggregatorFactory{" +
|
||||
"name='" + name + '\'' +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
", resolution=" + resolution +
|
||||
", numBuckets=" + numBuckets +
|
||||
", lowerLimit=" + lowerLimit +
|
||||
", upperLimit=" + upperLimit +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.FloatColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramBufferAggregator implements BufferAggregator
|
||||
{
|
||||
private final FloatColumnSelector selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
public ApproximateHistogramBufferAggregator(FloatColumnSelector selector, int resolution, float lowerLimit, float upperLimit)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
mutationBuffer.putInt(resolution);
|
||||
mutationBuffer.putInt(0); //initial binCount
|
||||
for (int i = 0; i < resolution; ++i) {
|
||||
mutationBuffer.putFloat(0f);
|
||||
}
|
||||
for (int i = 0; i < resolution; ++i) {
|
||||
mutationBuffer.putLong(0L);
|
||||
}
|
||||
|
||||
// min
|
||||
mutationBuffer.putFloat(Float.POSITIVE_INFINITY);
|
||||
// max
|
||||
mutationBuffer.putFloat(Float.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
h0.offer(selector.get());
|
||||
|
||||
mutationBuffer.position(position);
|
||||
h0.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
return ApproximateHistogram.fromBytes(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(ByteBuffer buf, int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramBufferAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.databind.Module;
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.inject.Binder;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.segment.serde.ComplexMetrics;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ApproximateHistogramDruidModule implements DruidModule
|
||||
{
|
||||
@Override
|
||||
public List<? extends Module> getJacksonModules()
|
||||
{
|
||||
return ImmutableList.of(
|
||||
new SimpleModule().registerSubtypes(
|
||||
ApproximateHistogramFoldingAggregatorFactory.class,
|
||||
ApproximateHistogramAggregatorFactory.class,
|
||||
EqualBucketsPostAggregator.class,
|
||||
CustomBucketsPostAggregator.class,
|
||||
BucketsPostAggregator.class,
|
||||
QuantilesPostAggregator.class,
|
||||
QuantilePostAggregator.class,
|
||||
MinPostAggregator.class,
|
||||
MaxPostAggregator.class
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Binder binder)
|
||||
{
|
||||
if (ComplexMetrics.getSerdeForType("approximateHistogram") == null) {
|
||||
ComplexMetrics.registerSerde("approximateHistogram", new ApproximateHistogramFoldingSerde());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
public class ApproximateHistogramFoldingAggregator implements Aggregator
|
||||
{
|
||||
private final String name;
|
||||
private final ObjectColumnSelector<ApproximateHistogram> selector;
|
||||
private final int resolution;
|
||||
private final float lowerLimit;
|
||||
private final float upperLimit;
|
||||
|
||||
private ApproximateHistogram histogram;
|
||||
private float[] tmpBufferP;
|
||||
private long[] tmpBufferB;
|
||||
|
||||
public ApproximateHistogramFoldingAggregator(
|
||||
String name,
|
||||
ObjectColumnSelector<ApproximateHistogram> selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
|
||||
tmpBufferP = new float[resolution];
|
||||
tmpBufferB = new long[resolution];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
ApproximateHistogram h = selector.get();
|
||||
if (h == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (h.binCount() + histogram.binCount() <= tmpBufferB.length) {
|
||||
histogram.foldFast(h, tmpBufferP, tmpBufferB);
|
||||
} else {
|
||||
histogram.foldFast(h);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset()
|
||||
{
|
||||
this.histogram = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get()
|
||||
{
|
||||
return histogram;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.primitives.Floats;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.metamx.common.IAE;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
@JsonTypeName("approxHistogramFold")
|
||||
public class ApproximateHistogramFoldingAggregatorFactory extends ApproximateHistogramAggregatorFactory
|
||||
{
|
||||
private static final byte CACHE_TYPE_ID = 0x9;
|
||||
|
||||
@JsonCreator
|
||||
public ApproximateHistogramFoldingAggregatorFactory(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("resolution") Integer resolution,
|
||||
@JsonProperty("numBuckets") Integer numBuckets,
|
||||
@JsonProperty("lowerLimit") Float lowerLimit,
|
||||
@JsonProperty("upperLimit") Float upperLimit
|
||||
)
|
||||
{
|
||||
super(name, fieldName, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
ObjectColumnSelector selector = metricFactory.makeObjectColumnSelector(fieldName);
|
||||
|
||||
if (selector == null) {
|
||||
// gracefully handle undefined metrics
|
||||
|
||||
selector = new ObjectColumnSelector<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> classOfObject()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram get()
|
||||
{
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (ApproximateHistogram.class.isAssignableFrom(selector.classOfObject())) {
|
||||
return new ApproximateHistogramFoldingAggregator(
|
||||
name,
|
||||
selector,
|
||||
resolution,
|
||||
lowerLimit,
|
||||
upperLimit
|
||||
);
|
||||
}
|
||||
|
||||
throw new IAE(
|
||||
"Incompatible type for metric[%s], expected a ApproximateHistogram, got a %s",
|
||||
fieldName,
|
||||
selector.classOfObject()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
|
||||
{
|
||||
ObjectColumnSelector selector = metricFactory.makeObjectColumnSelector(fieldName);
|
||||
|
||||
if (selector == null) {
|
||||
// gracefully handle undefined metrics
|
||||
|
||||
selector = new ObjectColumnSelector<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> classOfObject()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram get()
|
||||
{
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (ApproximateHistogram.class.isAssignableFrom(selector.classOfObject())) {
|
||||
return new ApproximateHistogramFoldingBufferAggregator(selector, resolution, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
throw new IAE(
|
||||
"Incompatible type for metric[%s], expected a ApproximateHistogram, got a %s",
|
||||
fieldName,
|
||||
selector.classOfObject()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getCombiningFactory()
|
||||
{
|
||||
return new ApproximateHistogramFoldingAggregatorFactory(name, name, resolution, numBuckets, lowerLimit, upperLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
byte[] fieldNameBytes = fieldName.getBytes(Charsets.UTF_8);
|
||||
return ByteBuffer.allocate(1 + fieldNameBytes.length + Ints.BYTES * 2 + Floats.BYTES * 2)
|
||||
.put(CACHE_TYPE_ID)
|
||||
.put(fieldNameBytes)
|
||||
.putInt(resolution)
|
||||
.putInt(numBuckets)
|
||||
.putFloat(lowerLimit)
|
||||
.putFloat(upperLimit)
|
||||
.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "ApproximateHistogramFoldingAggregatorFactory{" +
|
||||
"name='" + name + '\'' +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
", resolution=" + resolution +
|
||||
", numBuckets=" + numBuckets +
|
||||
", lowerLimit=" + lowerLimit +
|
||||
", upperLimit=" + upperLimit +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.segment.ObjectColumnSelector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramFoldingBufferAggregator implements BufferAggregator
|
||||
{
|
||||
private final ObjectColumnSelector<ApproximateHistogram> selector;
|
||||
private final int resolution;
|
||||
private final float upperLimit;
|
||||
private final float lowerLimit;
|
||||
|
||||
private float[] tmpBufferP;
|
||||
private long[] tmpBufferB;
|
||||
|
||||
public ApproximateHistogramFoldingBufferAggregator(
|
||||
ObjectColumnSelector<ApproximateHistogram> selector,
|
||||
int resolution,
|
||||
float lowerLimit,
|
||||
float upperLimit
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.resolution = resolution;
|
||||
this.lowerLimit = lowerLimit;
|
||||
this.upperLimit = upperLimit;
|
||||
|
||||
tmpBufferP = new float[resolution];
|
||||
tmpBufferB = new long[resolution];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(ByteBuffer buf, int position)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(resolution, lowerLimit, upperLimit);
|
||||
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
// use dense storage for aggregation
|
||||
h.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.duplicate();
|
||||
mutationBuffer.position(position);
|
||||
|
||||
ApproximateHistogram h0 = ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
h0.setLowerLimit(lowerLimit);
|
||||
h0.setUpperLimit(upperLimit);
|
||||
ApproximateHistogram hNext = selector.get();
|
||||
h0.foldFast(hNext, tmpBufferP, tmpBufferB);
|
||||
|
||||
mutationBuffer.position(position);
|
||||
h0.toBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object get(ByteBuffer buf, int position)
|
||||
{
|
||||
ByteBuffer mutationBuffer = buf.asReadOnlyBuffer();
|
||||
mutationBuffer.position(position);
|
||||
return ApproximateHistogram.fromBytesDense(mutationBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(ByteBuffer buf, int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("ApproximateHistogramFoldingBufferAggregator does not support getFloat()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
// no resources to cleanup
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.collect.Ordering;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.segment.column.ColumnBuilder;
|
||||
import io.druid.segment.data.GenericIndexed;
|
||||
import io.druid.segment.data.ObjectStrategy;
|
||||
import io.druid.segment.serde.ColumnPartSerde;
|
||||
import io.druid.segment.serde.ComplexColumnPartSerde;
|
||||
import io.druid.segment.serde.ComplexColumnPartSupplier;
|
||||
import io.druid.segment.serde.ComplexMetricExtractor;
|
||||
import io.druid.segment.serde.ComplexMetricSerde;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class ApproximateHistogramFoldingSerde extends ComplexMetricSerde
|
||||
{
|
||||
private static Ordering<ApproximateHistogram> comparator = new Ordering<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public int compare(
|
||||
ApproximateHistogram arg1, ApproximateHistogram arg2
|
||||
)
|
||||
{
|
||||
return ApproximateHistogramAggregator.COMPARATOR.compare(arg1, arg2);
|
||||
}
|
||||
}.nullsFirst();
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return "approximateHistogram";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return new ComplexMetricExtractor()
|
||||
{
|
||||
@Override
|
||||
public Class<ApproximateHistogram> extractedClass()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram extractValue(InputRow inputRow, String metricName)
|
||||
{
|
||||
List<String> dimValues = inputRow.getDimension(metricName);
|
||||
if (dimValues != null && dimValues.size() > 0) {
|
||||
Iterator<String> values = dimValues.iterator();
|
||||
|
||||
ApproximateHistogram h = new ApproximateHistogram();
|
||||
|
||||
while (values.hasNext()) {
|
||||
float value = Float.parseFloat(values.next());
|
||||
h.offer(value);
|
||||
}
|
||||
return h;
|
||||
} else {
|
||||
return new ApproximateHistogram(0);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnPartSerde deserializeColumn(
|
||||
ByteBuffer byteBuffer, ColumnBuilder columnBuilder
|
||||
)
|
||||
{
|
||||
final GenericIndexed column = GenericIndexed.read(byteBuffer, getObjectStrategy());
|
||||
|
||||
columnBuilder.setComplexColumn(new ComplexColumnPartSupplier(getTypeName(), column));
|
||||
|
||||
return new ComplexColumnPartSerde(column, getTypeName());
|
||||
}
|
||||
|
||||
public ObjectStrategy getObjectStrategy()
|
||||
{
|
||||
return new ObjectStrategy<ApproximateHistogram>()
|
||||
{
|
||||
@Override
|
||||
public Class<? extends ApproximateHistogram> getClazz()
|
||||
{
|
||||
return ApproximateHistogram.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApproximateHistogram fromByteBuffer(ByteBuffer buffer, int numBytes)
|
||||
{
|
||||
final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
|
||||
readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
|
||||
return ApproximateHistogram.fromBytes(readOnlyBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toBytes(ApproximateHistogram h)
|
||||
{
|
||||
if (h == null) {
|
||||
return new byte[]{};
|
||||
}
|
||||
return h.toBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(ApproximateHistogram o1, ApproximateHistogram o2)
|
||||
{
|
||||
return comparator.compare(o1, o2);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
|
||||
public abstract class ApproximateHistogramPostAggregator implements PostAggregator
|
||||
{
|
||||
private static final Comparator COMPARATOR = ApproximateHistogramAggregator.COMPARATOR;
|
||||
|
||||
private final String name;
|
||||
private final String fieldName;
|
||||
|
||||
public ApproximateHistogramPostAggregator(
|
||||
String name,
|
||||
String fieldName
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract Object compute(Map<String, Object> values);
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFieldName()
|
||||
{
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract String toString();
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
public class ArrayUtils
|
||||
{
|
||||
public static int hashCode(long[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
long v = a[i];
|
||||
hashCode = 31 * hashCode + (int) (v ^ (v >>> 32));
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public static int hashCode(float[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
hashCode = 31 * hashCode + Float.floatToIntBits(a[i]);
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public static int hashCode(double[] a, int fromIndex, int toIndex)
|
||||
{
|
||||
int hashCode = 1;
|
||||
int i = fromIndex;
|
||||
while (i < toIndex) {
|
||||
long v = Double.doubleToLongBits(a[i]);
|
||||
hashCode = 31 * hashCode + (int) (v ^ (v >>> 32));
|
||||
++i;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("buckets")
|
||||
public class BucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float bucketSize;
|
||||
private final float offset;
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public BucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("bucketSize") float bucketSize,
|
||||
@JsonProperty("offset") float offset
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.bucketSize = bucketSize;
|
||||
if (this.bucketSize <= 0) {
|
||||
throw new IAE("Illegal bucketSize [%s], must be > 0", this.bucketSize);
|
||||
}
|
||||
this.offset = offset;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(bucketSize, offset);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getBucketSize()
|
||||
{
|
||||
return bucketSize;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getOffset()
|
||||
{
|
||||
return bucketSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "BucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", bucketSize=" + this.getBucketSize() +
|
||||
", offset=" + this.getOffset() +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import java.nio.DoubleBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
|
||||
public class BufferUtils
|
||||
{
|
||||
public static int binarySearch(DoubleBuffer buf, int minIndex, int maxIndex, double value)
|
||||
{
|
||||
while (minIndex < maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex - 1) >>> 1;
|
||||
|
||||
double currValue = buf.get(currIndex);
|
||||
int comparison = Double.compare(currValue, value);
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
}
|
||||
|
||||
if (comparison < 0) {
|
||||
minIndex = currIndex + 1;
|
||||
} else {
|
||||
maxIndex = currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -(minIndex + 1);
|
||||
}
|
||||
|
||||
public static int binarySearch(FloatBuffer buf, int minIndex, int maxIndex, float value)
|
||||
{
|
||||
while (minIndex < maxIndex) {
|
||||
int currIndex = (minIndex + maxIndex - 1) >>> 1;
|
||||
|
||||
float currValue = buf.get(currIndex);
|
||||
int comparison = Float.compare(currValue, value);
|
||||
if (comparison == 0) {
|
||||
return currIndex;
|
||||
}
|
||||
|
||||
if (comparison < 0) {
|
||||
minIndex = currIndex + 1;
|
||||
} else {
|
||||
maxIndex = currIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -(minIndex + 1);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("customBuckets")
|
||||
public class CustomBucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float[] breaks;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public CustomBucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("breaks") float[] breaks
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.breaks = breaks;
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(breaks);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getBreaks()
|
||||
{
|
||||
return breaks;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "CustomBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", breaks=" + Arrays.toString(this.getBreaks()) +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("equalBuckets")
|
||||
public class EqualBucketsPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final int numBuckets;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public EqualBucketsPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("numBuckets") int numBuckets
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.numBuckets = numBuckets;
|
||||
if (this.numBuckets <= 1) {
|
||||
throw new IAE("Illegal number of buckets[%s], must be > 1", this.numBuckets);
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.toHistogram(numBuckets);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getNumBuckets()
|
||||
{
|
||||
return numBuckets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "EqualBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", numBuckets=" + this.getNumBuckets() +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class Histogram
|
||||
{
|
||||
double[] breaks;
|
||||
double[] counts;
|
||||
|
||||
public Histogram(float[] breaks, double[] counts)
|
||||
{
|
||||
double[] retVal = new double[breaks.length];
|
||||
for (int i = 0; i < breaks.length; ++i) {
|
||||
retVal[i] = (double) breaks[i];
|
||||
}
|
||||
|
||||
this.breaks = retVal;
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double[] getBreaks()
|
||||
{
|
||||
return breaks;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double[] getCounts()
|
||||
{
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Histogram that = (Histogram) o;
|
||||
|
||||
if (!Arrays.equals(this.getBreaks(), that.getBreaks())) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(this.getCounts(), that.getCounts())) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
int result = (this.getBreaks() != null ? ArrayUtils.hashCode(this.getBreaks(), 0, this.getBreaks().length) : 0);
|
||||
result = 31 * result + (this.getCounts() != null ? ArrayUtils.hashCode(
|
||||
this.getCounts(),
|
||||
0,
|
||||
this.getCounts().length
|
||||
) : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("max")
|
||||
public class MaxPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public MaxPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getMax();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("min")
|
||||
public class MinPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public MinPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getMin();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("quantile")
|
||||
public class QuantilePostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
static final Comparator COMPARATOR = new Comparator()
|
||||
{
|
||||
@Override
|
||||
public int compare(Object o, Object o1)
|
||||
{
|
||||
return Double.compare(((Number) o).doubleValue(), ((Number) o1).doubleValue());
|
||||
}
|
||||
};
|
||||
|
||||
private final float probability;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public QuantilePostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("probability") float probability
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.probability = probability;
|
||||
this.fieldName = fieldName;
|
||||
|
||||
if (probability < 0 | probability > 1) {
|
||||
throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", probability);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
return ah.getQuantiles(new float[]{this.getProbability()})[0];
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getProbability()
|
||||
{
|
||||
return probability;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "QuantilePostAggregator{" +
|
||||
"probability=" + probability +
|
||||
", fieldName='" + fieldName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
@JsonTypeName("quantiles")
|
||||
public class Quantiles
|
||||
{
|
||||
float[] probabilities;
|
||||
float[] quantiles;
|
||||
float min;
|
||||
float max;
|
||||
|
||||
@JsonCreator
|
||||
public Quantiles(
|
||||
@JsonProperty("probabilities") float[] probabilities,
|
||||
@JsonProperty("quantiles") float[] quantiles,
|
||||
@JsonProperty("min") float min,
|
||||
@JsonProperty("max") float max
|
||||
)
|
||||
{
|
||||
this.probabilities = probabilities;
|
||||
this.quantiles = quantiles;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getProbabilities()
|
||||
{
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getQuantiles()
|
||||
{
|
||||
return quantiles;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getMin()
|
||||
{
|
||||
return min;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float getMax()
|
||||
{
|
||||
return max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Quantiles quantiles1 = (Quantiles) o;
|
||||
|
||||
if (Float.compare(quantiles1.max, max) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (Float.compare(quantiles1.min, min) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(probabilities, quantiles1.probabilities)) {
|
||||
return false;
|
||||
}
|
||||
if (!Arrays.equals(quantiles, quantiles1.quantiles)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
int result = probabilities != null ? Arrays.hashCode(probabilities) : 0;
|
||||
result = 31 * result + (quantiles != null ? Arrays.hashCode(quantiles) : 0);
|
||||
result = 31 * result + (min != +0.0f ? Float.floatToIntBits(min) : 0);
|
||||
result = 31 * result + (max != +0.0f ? Float.floatToIntBits(max) : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.metamx.common.IAE;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@JsonTypeName("quantiles")
|
||||
public class QuantilesPostAggregator extends ApproximateHistogramPostAggregator
|
||||
{
|
||||
private final float[] probabilities;
|
||||
private String fieldName;
|
||||
|
||||
@JsonCreator
|
||||
public QuantilesPostAggregator(
|
||||
@JsonProperty("name") String name,
|
||||
@JsonProperty("fieldName") String fieldName,
|
||||
@JsonProperty("probabilities") float[] probabilities
|
||||
)
|
||||
{
|
||||
super(name, fieldName);
|
||||
this.probabilities = probabilities;
|
||||
this.fieldName = fieldName;
|
||||
|
||||
for (float p : probabilities) {
|
||||
if (p < 0 | p > 1) {
|
||||
throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return Sets.newHashSet(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object compute(Map<String, Object> values)
|
||||
{
|
||||
final ApproximateHistogram ah = (ApproximateHistogram) values.get(this.getFieldName());
|
||||
|
||||
return new Quantiles(this.getProbabilities(), ah.getQuantiles(this.getProbabilities()), ah.getMin(), ah.getMax());
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public float[] getProbabilities()
|
||||
{
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "EqualBucketsPostAggregator{" +
|
||||
"name='" + this.getName() + '\'' +
|
||||
", fieldName='" + this.getFieldName() + '\'' +
|
||||
", probabilities=" + Arrays.toString(this.getProbabilities()) +
|
||||
'}';
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
io.druid.query.aggregation.histogram.ApproximateHistogramDruidModule
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.TestFloatColumnSelector;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class ApproximateHistogramAggregatorTest
|
||||
{
|
||||
private void aggregateBuffer(TestFloatColumnSelector selector, BufferAggregator agg, ByteBuffer buf, int position)
|
||||
{
|
||||
agg.aggregate(buf, position);
|
||||
selector.increment();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBufferAggregate() throws Exception
|
||||
{
|
||||
final float[] values = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45};
|
||||
final int resolution = 5;
|
||||
final int numBuckets = 5;
|
||||
|
||||
final TestFloatColumnSelector selector = new TestFloatColumnSelector(values);
|
||||
|
||||
ApproximateHistogramAggregatorFactory factory = new ApproximateHistogramAggregatorFactory(
|
||||
"billy", "billy", resolution, numBuckets, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY
|
||||
);
|
||||
ApproximateHistogramBufferAggregator agg = new ApproximateHistogramBufferAggregator(selector, resolution, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
|
||||
|
||||
ByteBuffer buf = ByteBuffer.allocate(factory.getMaxIntermediateSize());
|
||||
int position = 0;
|
||||
|
||||
agg.init(buf, position);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
aggregateBuffer(selector, agg, buf, position);
|
||||
}
|
||||
|
||||
ApproximateHistogram h = ((ApproximateHistogram) agg.get(buf, position));
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions don't match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions, 0.01f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts don't match expected counts",
|
||||
new long[]{1, 2, 3, 3, 1}, h.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("getMin value doesn't match expected getMin", 2, h.min(), 0);
|
||||
Assert.assertEquals("getMax value doesn't match expected getMax", 45, h.max(), 0);
|
||||
|
||||
Assert.assertEquals("bin count doesn't match expected bin count", 5, h.binCount());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.primitives.Floats;
|
||||
import io.druid.query.aggregation.Histogram;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class ApproximateHistogramErrorBenchmark
|
||||
{
|
||||
private boolean debug = true;
|
||||
private int numBuckets = 20;
|
||||
private int numBreaks = numBuckets + 1;
|
||||
private int numPerHist = 50;
|
||||
private int numHists = 10;
|
||||
private int resolution = 50;
|
||||
private int combinedResolution = 100;
|
||||
private Random rand = new Random(2);
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setDebug(boolean debug)
|
||||
{
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumBuckets(int numBuckets)
|
||||
{
|
||||
this.numBuckets = numBuckets;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumBreaks(int numBreaks)
|
||||
{
|
||||
this.numBreaks = numBreaks;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumPerHist(int numPerHist)
|
||||
{
|
||||
this.numPerHist = numPerHist;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setNumHists(int numHists)
|
||||
{
|
||||
this.numHists = numHists;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setResolution(int resolution)
|
||||
{
|
||||
this.resolution = resolution;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ApproximateHistogramErrorBenchmark setCombinedResolution(int combinedResolution)
|
||||
{
|
||||
this.combinedResolution = combinedResolution;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
ApproximateHistogramErrorBenchmark approxHist = new ApproximateHistogramErrorBenchmark();
|
||||
System.out.println(
|
||||
Arrays.toString(
|
||||
approxHist.setDebug(true)
|
||||
.setNumPerHist(50)
|
||||
.setNumHists(10000)
|
||||
.setResolution(50)
|
||||
.setCombinedResolution(100)
|
||||
.getErrors()
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
ApproximateHistogramErrorBenchmark approxHist2 = new ApproximateHistogramErrorBenchmark();
|
||||
int[] numHistsArray = new int[]{10, 100, 1000, 10000, 100000};
|
||||
float[] errs1 = new float[numHistsArray.length];
|
||||
float[] errs2 = new float[numHistsArray.length];
|
||||
for (int i = 0; i < numHistsArray.length; ++i) {
|
||||
float[] tmp = approxHist2.setDebug(false).setNumHists(numHistsArray[i]).setCombinedResolution(100).getErrors();
|
||||
errs1[i] = tmp[0];
|
||||
errs2[i] = tmp[1];
|
||||
}
|
||||
|
||||
System.out
|
||||
.format("Number of histograms for folding : %s \n", Arrays.toString(numHistsArray));
|
||||
System.out.format("Errors for approximate histogram : %s \n", Arrays.toString(errs1));
|
||||
System.out.format("Errors for approximate histogram, ruleFold : %s \n", Arrays.toString(errs2));
|
||||
}
|
||||
|
||||
private float[] getErrors()
|
||||
{
|
||||
final int numValues = numHists * numPerHist;
|
||||
final float[] values = new float[numValues];
|
||||
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
values[i] = (float) rand.nextGaussian();
|
||||
}
|
||||
|
||||
float min = Floats.min(values);
|
||||
min = (float) (min < 0 ? 1.02 : .98) * min;
|
||||
float max = Floats.max(values);
|
||||
max = (float) (max < 0 ? .98 : 1.02) * max;
|
||||
final float stride = (max - min) / numBuckets;
|
||||
final float[] breaks = new float[numBreaks];
|
||||
for (int i = 0; i < numBreaks; i++) {
|
||||
breaks[i] = min + stride * i;
|
||||
}
|
||||
|
||||
Histogram h = new Histogram(breaks);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
double[] hcounts = h.asVisual().counts;
|
||||
|
||||
ApproximateHistogram ah1 = new ApproximateHistogram(resolution);
|
||||
ApproximateHistogram ah2 = new ApproximateHistogram(combinedResolution);
|
||||
ApproximateHistogram tmp = new ApproximateHistogram(resolution);
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
tmp.offer(values[i]);
|
||||
if ((i + 1) % numPerHist == 0) {
|
||||
ah1.fold(tmp);
|
||||
ah2.foldRule(tmp, null, null);
|
||||
tmp = new ApproximateHistogram(resolution);
|
||||
}
|
||||
}
|
||||
double[] ahcounts1 = ah1.toHistogram(breaks).getCounts();
|
||||
double[] ahcounts2 = ah2.toHistogram(breaks).getCounts();
|
||||
|
||||
float err1 = 0;
|
||||
float err2 = 0;
|
||||
for (int j = 0; j < hcounts.length; j++) {
|
||||
err1 += Math.abs((hcounts[j] - ahcounts1[j]) / numValues);
|
||||
err2 += Math.abs((hcounts[j] - ahcounts2[j]) / numValues);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
float sum = 0;
|
||||
for (double v : hcounts) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Exact Histogram Sum:");
|
||||
System.out.println(sum);
|
||||
sum = 0;
|
||||
for (double v : ahcounts1) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Approximate Histogram Sum:");
|
||||
System.out.println(sum);
|
||||
sum = 0;
|
||||
for (double v : ahcounts2) {
|
||||
sum += v;
|
||||
}
|
||||
System.out.println("Approximate Histogram Rule Fold Sum:");
|
||||
System.out.println(sum);
|
||||
System.out.println("Exact Histogram:");
|
||||
System.out.println(h.asVisual());
|
||||
System.out.println("Approximate Histogram:");
|
||||
System.out.println(ah1.toHistogram(breaks));
|
||||
System.out.println("Approximate Histogram Rule Fold:");
|
||||
System.out.println(ah2.toHistogram(breaks));
|
||||
System.out.format("Error for approximate histogram: %s \n", err1);
|
||||
System.out.format("Error for approximate histogram, ruleFold: %s \n", err2);
|
||||
System.out.format("Error ratio for AHRF: %s \n", err2 / err1);
|
||||
}
|
||||
return new float[]{err1, err2, err2 / err1};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import io.druid.query.aggregation.TestFloatColumnSelector;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class ApproximateHistogramPostAggregatorTest
|
||||
{
|
||||
static final float[] VALUES = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompute()
|
||||
{
|
||||
ApproximateHistogram ah = buildHistogram(10, VALUES);
|
||||
final TestFloatColumnSelector selector = new TestFloatColumnSelector(VALUES);
|
||||
|
||||
ApproximateHistogramAggregator agg = new ApproximateHistogramAggregator("price", selector, 10, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
|
||||
for (int i = 0; i < VALUES.length; i++) {
|
||||
agg.aggregate();
|
||||
selector.increment();
|
||||
}
|
||||
|
||||
Map<String, Object> metricValues = new HashMap<String, Object>();
|
||||
metricValues.put(agg.getName(), agg.get());
|
||||
|
||||
ApproximateHistogramPostAggregator approximateHistogramPostAggregator = new EqualBucketsPostAggregator(
|
||||
"approxHist",
|
||||
"price",
|
||||
5
|
||||
);
|
||||
Assert.assertEquals(ah.toHistogram(5), approximateHistogramPostAggregator.compute(metricValues));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.druid.collections.StupidPool;
|
||||
import io.druid.query.QueryRunner;
|
||||
import io.druid.query.QueryRunnerTestHelper;
|
||||
import io.druid.query.Result;
|
||||
import io.druid.query.TestQueryRunners;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.MaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.MinAggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.topn.TopNQuery;
|
||||
import io.druid.query.topn.TopNQueryBuilder;
|
||||
import io.druid.query.topn.TopNQueryConfig;
|
||||
import io.druid.query.topn.TopNQueryQueryToolChest;
|
||||
import io.druid.query.topn.TopNQueryRunnerFactory;
|
||||
import io.druid.query.topn.TopNResultValue;
|
||||
import io.druid.segment.TestHelper;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class ApproximateHistogramQueryTest
|
||||
{
|
||||
|
||||
private final QueryRunner runner;
|
||||
|
||||
public ApproximateHistogramQueryTest(
|
||||
QueryRunner runner
|
||||
)
|
||||
{
|
||||
this.runner = runner;
|
||||
}
|
||||
|
||||
@Parameterized.Parameters
|
||||
public static Collection<?> constructorFeeder() throws IOException
|
||||
{
|
||||
List<Object> retVal = Lists.newArrayList();
|
||||
retVal.addAll(
|
||||
QueryRunnerTestHelper.makeQueryRunners(
|
||||
new TopNQueryRunnerFactory(
|
||||
TestQueryRunners.getPool(),
|
||||
new TopNQueryQueryToolChest(new TopNQueryConfig()),
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
)
|
||||
)
|
||||
);
|
||||
retVal.addAll(
|
||||
QueryRunnerTestHelper.makeQueryRunners(
|
||||
new TopNQueryRunnerFactory(
|
||||
new StupidPool<ByteBuffer>(
|
||||
new Supplier<ByteBuffer>()
|
||||
{
|
||||
@Override
|
||||
public ByteBuffer get()
|
||||
{
|
||||
return ByteBuffer.allocate(2000);
|
||||
}
|
||||
}
|
||||
),
|
||||
new TopNQueryQueryToolChest(new TopNQueryConfig()),
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNWithApproximateHistogramAgg()
|
||||
{
|
||||
ApproximateHistogramAggregatorFactory factory = new ApproximateHistogramAggregatorFactory(
|
||||
"apphisto",
|
||||
"index",
|
||||
10,
|
||||
5,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.POSITIVE_INFINITY
|
||||
);
|
||||
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(QueryRunnerTestHelper.providerDimension)
|
||||
.metric(QueryRunnerTestHelper.dependentPostAggMetric)
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.fullOnInterval)
|
||||
.aggregators(
|
||||
Lists.<AggregatorFactory>newArrayList(
|
||||
Iterables.concat(
|
||||
QueryRunnerTestHelper.commonAggregators,
|
||||
Lists.newArrayList(
|
||||
new MaxAggregatorFactory("maxIndex", "index"),
|
||||
new MinAggregatorFactory("minIndex", "index"),
|
||||
factory
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
.postAggregators(
|
||||
Arrays.<PostAggregator>asList(
|
||||
QueryRunnerTestHelper.addRowsIndexConstant,
|
||||
QueryRunnerTestHelper.dependentPostAgg,
|
||||
new QuantilePostAggregator("quantile", "apphisto", 0.5f)
|
||||
)
|
||||
)
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<TopNResultValue>(
|
||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "total_market")
|
||||
.put("rows", 186L)
|
||||
.put("index", 215679.82879638672D)
|
||||
.put("addRowsIndexConstant", 215866.82879638672D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 216053.82879638672D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
|
||||
.put("maxIndex", 1743.9217529296875D)
|
||||
.put("minIndex", 792.3260498046875D)
|
||||
.put("quantile", 1085.6775f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
554.4271240234375f,
|
||||
792.3260498046875f,
|
||||
1030.2249755859375f,
|
||||
1268.1239013671875f,
|
||||
1506.0228271484375f,
|
||||
1743.9217529296875f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
39.42073059082031D,
|
||||
103.29110717773438D,
|
||||
34.93659591674805D,
|
||||
8.351564407348633D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build(),
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "upfront")
|
||||
.put("rows", 186L)
|
||||
.put("index", 192046.1060180664D)
|
||||
.put("addRowsIndexConstant", 192233.1060180664D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 192420.1060180664D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_2)
|
||||
.put("maxIndex", 1870.06103515625D)
|
||||
.put("minIndex", 545.9906005859375D)
|
||||
.put("quantile", 880.9881f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
214.97299194335938f,
|
||||
545.9906005859375f,
|
||||
877.0081787109375f,
|
||||
1208.0257568359375f,
|
||||
1539.0433349609375f,
|
||||
1870.06103515625f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
67.53287506103516D,
|
||||
72.22068786621094D,
|
||||
31.984678268432617D,
|
||||
14.261756896972656D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build(),
|
||||
ImmutableMap.<String, Object>builder()
|
||||
.put(QueryRunnerTestHelper.providerDimension, "spot")
|
||||
.put("rows", 837L)
|
||||
.put("index", 95606.57232284546D)
|
||||
.put("addRowsIndexConstant", 96444.57232284546D)
|
||||
.put(QueryRunnerTestHelper.dependentPostAggMetric, 97282.57232284546D)
|
||||
.put("uniques", QueryRunnerTestHelper.UNIQUES_9)
|
||||
.put("maxIndex", 277.2735290527344D)
|
||||
.put("minIndex", 59.02102279663086D)
|
||||
.put("quantile", 101.78856f)
|
||||
.put(
|
||||
"apphisto",
|
||||
new Histogram(
|
||||
new float[]{
|
||||
4.457897186279297f,
|
||||
59.02102279663086f,
|
||||
113.58415222167969f,
|
||||
168.14727783203125f,
|
||||
222.7104034423828f,
|
||||
277.2735290527344f
|
||||
},
|
||||
new double[]{
|
||||
0.0D,
|
||||
462.4309997558594D,
|
||||
357.5404968261719D,
|
||||
15.022850036621094D,
|
||||
2.0056631565093994D
|
||||
}
|
||||
)
|
||||
)
|
||||
.build()
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,588 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
public class ApproximateHistogramTest
|
||||
{
|
||||
static final float[] VALUES = {23, 19, 10, 16, 36, 2, 9, 32, 30, 45};
|
||||
static final float[] VALUES2 = {23, 19, 10, 16, 36, 2, 1, 9, 32, 30, 45, 46};
|
||||
|
||||
static final float[] VALUES3 = {
|
||||
20, 16, 19, 27, 17, 20, 18, 20, 28, 14, 17, 21, 20, 21, 10, 25, 23, 17, 21, 18,
|
||||
14, 20, 18, 12, 19, 20, 23, 25, 15, 22, 14, 17, 15, 23, 23, 15, 27, 20, 17, 15
|
||||
};
|
||||
static final float[] VALUES4 = {
|
||||
27.489f, 3.085f, 3.722f, 66.875f, 30.998f, -8.193f, 5.395f, 5.109f, 10.944f, 54.75f,
|
||||
14.092f, 15.604f, 52.856f, 66.034f, 22.004f, -14.682f, -50.985f, 2.872f, 61.013f,
|
||||
-21.766f, 19.172f, 62.882f, 33.537f, 21.081f, 67.115f, 44.789f, 64.1f, 20.911f,
|
||||
-6.553f, 2.178f
|
||||
};
|
||||
static final float[] VALUES5 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
static final float[] VALUES6 = {1f, 1.5f, 2f, 2.5f, 3f, 3.5f, 4f, 4.5f, 5f, 5.5f, 6f, 6.5f, 7f, 7.5f, 8f, 8.5f, 9f, 9.5f, 10f};
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
protected ApproximateHistogram buildHistogram(int size, float[] values, float lowerLimit, float upperLimit)
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(size, lowerLimit, upperLimit);
|
||||
for (float v : values) {
|
||||
h.offer(v);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOffer() throws Exception
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
|
||||
// (2, 1), (9.5, 2), (19.33, 3), (32.67, 3), (45, 1)
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, h.positions(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new long[]{1, 2, 3, 3, 1}, h.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("min value matches expexted min", 2, h.min(), 0);
|
||||
Assert.assertEquals("max value matches expexted max", 45, h.max(), 0);
|
||||
|
||||
Assert.assertEquals("bin count matches expected bin count", 5, h.binCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFold()
|
||||
{
|
||||
ApproximateHistogram merged = new ApproximateHistogram(0);
|
||||
ApproximateHistogram mergedFast = new ApproximateHistogram(0);
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(5);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
h1.offer(VALUES[i]);
|
||||
}
|
||||
for (int i = 5; i < VALUES.length; ++i) {
|
||||
h2.offer(VALUES[i]);
|
||||
}
|
||||
|
||||
merged.fold(h1);
|
||||
merged.fold(h2);
|
||||
mergedFast.foldFast(h1);
|
||||
mergedFast.foldFast(h2);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{2, 9.5f, 19.33f, 32.67f, 45f}, merged.positions(), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{11.2f, 30.25f, 45f}, mergedFast.positions(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{1, 2, 3, 3, 1}, merged.bins()
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{5, 4, 1}, mergedFast.bins()
|
||||
);
|
||||
|
||||
Assert.assertEquals("merged max matches expected value", 45f, merged.max(), 0.1f);
|
||||
Assert.assertEquals("mergedfast max matches expected value", 45f, mergedFast.max(), 0.1f);
|
||||
Assert.assertEquals("merged min matches expected value", 2f, merged.min(), 0.1f);
|
||||
Assert.assertEquals("mergedfast min matches expected value", 2f, mergedFast.min(), 0.1f);
|
||||
|
||||
// fold where merged bincount is less than total bincount
|
||||
ApproximateHistogram a = buildHistogram(10, new float[]{1, 2, 3, 4, 5, 6});
|
||||
ApproximateHistogram aFast = buildHistogram(10, new float[]{1, 2, 3, 4, 5, 6});
|
||||
ApproximateHistogram b = buildHistogram(5, new float[]{3, 4, 5, 6});
|
||||
|
||||
a.fold(b);
|
||||
aFast.foldFast(b);
|
||||
|
||||
Assert.assertEquals(
|
||||
new ApproximateHistogram(
|
||||
6,
|
||||
new float[]{1, 2, 3, 4, 5, 6, 0, 0, 0, 0},
|
||||
new long[]{1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
|
||||
1, 6
|
||||
), a
|
||||
);
|
||||
Assert.assertEquals(
|
||||
new ApproximateHistogram(
|
||||
6,
|
||||
new float[]{1, 2, 3, 4, 5, 6, 0, 0, 0, 0},
|
||||
new long[]{1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
|
||||
1, 6
|
||||
), aFast
|
||||
);
|
||||
|
||||
ApproximateHistogram h3 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4 = new ApproximateHistogram(10);
|
||||
for (float v : VALUES3) {
|
||||
h3.offer(v);
|
||||
}
|
||||
for (float v : VALUES4) {
|
||||
h4.offer(v);
|
||||
}
|
||||
h3.fold(h4);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin positions match expected positions",
|
||||
new float[]{-50.98f, -21.77f, -9.81f, 3.73f, 13.72f, 20.1f, 29f, 44.79f, 53.8f, 64.67f},
|
||||
h3.positions(), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"final bin counts match expected counts",
|
||||
new long[]{1, 1, 3, 6, 12, 32, 6, 1, 2, 6}, h3.bins()
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFoldNothing() throws Exception
|
||||
{
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
|
||||
h1.fold(h2);
|
||||
h1.foldFast(h2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFoldNothing2() throws Exception
|
||||
{
|
||||
ApproximateHistogram h1 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h1Fast = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h3 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4 = new ApproximateHistogram(10);
|
||||
ApproximateHistogram h4Fast = new ApproximateHistogram(10);
|
||||
for (float v : VALUES3) {
|
||||
h3.offer(v);
|
||||
h4.offer(v);
|
||||
h4Fast.offer(v);
|
||||
}
|
||||
|
||||
h1.fold(h3);
|
||||
h4.fold(h2);
|
||||
h1Fast.foldFast(h3);
|
||||
h4Fast.foldFast(h2);
|
||||
|
||||
Assert.assertEquals(h3, h1);
|
||||
Assert.assertEquals(h4, h3);
|
||||
Assert.assertEquals(h3, h1Fast);
|
||||
Assert.assertEquals(h3, h4Fast);
|
||||
}
|
||||
|
||||
//@Test
|
||||
public void testFoldSpeed()
|
||||
{
|
||||
final int combinedHistSize = 200;
|
||||
final int histSize = 50;
|
||||
final int numRand = 10000;
|
||||
ApproximateHistogram h = new ApproximateHistogram(combinedHistSize);
|
||||
Random rand = new Random(0);
|
||||
//for(int i = 0; i < 200; ++i) h.offer((float)(rand.nextGaussian() * 50.0));
|
||||
long tFold = 0;
|
||||
int count = 5000000;
|
||||
Float[] randNums = new Float[numRand];
|
||||
for (int i = 0; i < numRand; i++) {
|
||||
randNums[i] = (float) rand.nextGaussian();
|
||||
}
|
||||
|
||||
List<ApproximateHistogram> randHist = Lists.newLinkedList();
|
||||
Iterator<ApproximateHistogram> it = Iterators.cycle(randHist);
|
||||
|
||||
for(int k = 0; k < numRand; ++k) {
|
||||
ApproximateHistogram tmp = new ApproximateHistogram(histSize);
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
tmp.offer((float) (rand.nextGaussian() + (double)k));
|
||||
}
|
||||
randHist.add(tmp);
|
||||
}
|
||||
|
||||
float[] mergeBufferP = new float[combinedHistSize * 2];
|
||||
long[] mergeBufferB = new long[combinedHistSize * 2];
|
||||
float[] mergeBufferD = new float[combinedHistSize * 2];
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
ApproximateHistogram tmp = it.next();
|
||||
|
||||
long t0 = System.nanoTime();
|
||||
//h.fold(tmp, mergeBufferP, mergeBufferB, mergeBufferD);
|
||||
h.foldFast(tmp, mergeBufferP, mergeBufferB);
|
||||
tFold += System.nanoTime() - t0;
|
||||
}
|
||||
|
||||
System.out.println(String.format("Average folds per second : %f", (double) count / (double) tFold * 1e9));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSum()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
|
||||
Assert.assertEquals(0.0f, h.sum(0), 0.01);
|
||||
Assert.assertEquals(1.0f, h.sum(2), 0.01);
|
||||
Assert.assertEquals(1.16f, h.sum(5), 0.01);
|
||||
Assert.assertEquals(3.28f, h.sum(15), 0.01);
|
||||
Assert.assertEquals(VALUES.length, h.sum(45), 0.01);
|
||||
Assert.assertEquals(VALUES.length, h.sum(46), 0.01);
|
||||
|
||||
ApproximateHistogram h2 = buildHistogram(5, VALUES2);
|
||||
|
||||
Assert.assertEquals(0.0f, h2.sum(0), 0.01);
|
||||
Assert.assertEquals(0.0f, h2.sum(1f), 0.01);
|
||||
Assert.assertEquals(1.0f, h2.sum(1.5f), 0.01);
|
||||
Assert.assertEquals(1.125f, h2.sum(2f), 0.001);
|
||||
Assert.assertEquals(2.0625f, h2.sum(5.75f), 0.001);
|
||||
Assert.assertEquals(3.0f, h2.sum(9.5f), 0.01);
|
||||
Assert.assertEquals(11.0f, h2.sum(45.5f), 0.01);
|
||||
Assert.assertEquals(12.0f, h2.sum(46f), 0.01);
|
||||
Assert.assertEquals(12.0f, h2.sum(47f), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeCompact()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
ApproximateHistogram h2 = new ApproximateHistogram(50).fold(h);
|
||||
Assert.assertEquals(h2, ApproximateHistogram.fromBytes(h2.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeDense()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
ByteBuffer buf = ByteBuffer.allocate(h.getDenseStorageSize());
|
||||
h.toBytesDense(buf);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(buf.array()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeSparse()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES);
|
||||
ByteBuffer buf = ByteBuffer.allocate(h.getSparseStorageSize());
|
||||
h.toBytesSparse(buf);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(buf.array()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeCompactExact()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(50, new float[]{1f, 2f, 3f, 4f, 5f});
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
h = buildHistogram(5, new float[]{1f, 2f, 3f});
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
|
||||
h = new ApproximateHistogram(40).fold(h);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerializeEmpty()
|
||||
{
|
||||
ApproximateHistogram h = new ApproximateHistogram(50);
|
||||
Assert.assertEquals(h, ApproximateHistogram.fromBytes(h.toBytes()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileSmaller()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(20, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{3.33f, 6.67f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.5f, 5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4f, 6f, 8f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileEqualSize()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(10, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{3.33f, 6.67f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.5f, 5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4f, 6f, 8f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileBigger()
|
||||
{
|
||||
ApproximateHistogram h = buildHistogram(5, VALUES5);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{4.5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2.83f, 6.17f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{2f, 4.5f, 7f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1.5f, 3.5f, 5.5f, 7.5f},
|
||||
h.getQuantiles(new float[]{.2f, .4f, .6f, .8f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{1f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuantileBigger2()
|
||||
{
|
||||
float[] thousand = new float[1000];
|
||||
for (int i = 1; i <= 1000; ++i) {
|
||||
thousand[i - 1] = i;
|
||||
}
|
||||
ApproximateHistogram h = buildHistogram(100, thousand);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{493.5f},
|
||||
h.getQuantiles(new float[]{.5f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{327.5f, 662f},
|
||||
h.getQuantiles(new float[]{.333f, .666f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{244.5f, 493.5f, 746f},
|
||||
h.getQuantiles(new float[]{.25f, .5f, .75f}), 0.1f
|
||||
);
|
||||
Assert.assertArrayEquals(
|
||||
"expected quantiles match actual quantiles",
|
||||
new float[]{96.5f, 196.53f, 294.5f, 395.5f, 493.5f, 597f, 696f, 795f, 895.25f},
|
||||
h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, .9f}), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLimitSum()
|
||||
{
|
||||
final float lowerLimit = 0f;
|
||||
final float upperLimit = 10f;
|
||||
|
||||
ApproximateHistogram h = buildHistogram(15, VALUES6, lowerLimit, upperLimit);
|
||||
|
||||
for (int i = 1; i <= 20; ++i) {
|
||||
ApproximateHistogram hLow = new ApproximateHistogram(5);
|
||||
ApproximateHistogram hHigh = new ApproximateHistogram(5);
|
||||
hLow.offer(lowerLimit - i);
|
||||
hHigh.offer(upperLimit + i);
|
||||
h.foldFast(hLow);
|
||||
h.foldFast(hHigh);
|
||||
}
|
||||
|
||||
Assert.assertEquals(20f, h.sum(lowerLimit), .7f);
|
||||
Assert.assertEquals(VALUES6.length + 20f, h.sum(upperLimit), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets()
|
||||
{
|
||||
final float[] values = new float[]{-5f, .01f, .02f, .06f, .12f, 1f, 2f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(.05f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{1f, 2f, 1f, 1f, 0f, 1f, 1f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-5.05f, 0f, .05f, .1f, .15f, .95f, 1f, 2f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets2()
|
||||
{
|
||||
final float[] values = new float[]{-5f, .01f, .02f, .06f, .12f, .94f, 1f, 2f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(.05f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{1f, 2f, 1f, 1f, 0f, 1f, 1f, 1f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-5.05f, 0f, .05f, .1f, .15f, .9f, .95f, 1f, 2.05f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets3()
|
||||
{
|
||||
final float[] values = new float[]{0f, 0f, .02f, .06f, .12f, .94f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f, 1f);
|
||||
Histogram h2 = h.toHistogram(1f, 0f);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected counts match actual counts",
|
||||
new double[]{2f, 4f},
|
||||
h2.getCounts(), 0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-1f, 0f, 1f},
|
||||
h2.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuckets4()
|
||||
{
|
||||
final float[] values = new float[]{0f, 0f, 0.01f, 0.51f, 0.6f,0.8f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0.5f,1f);
|
||||
Histogram h3 = h.toHistogram(0.2f,0);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected counts match actual counts",
|
||||
new double[]{3f,2f,1f},
|
||||
h3.getCounts(),
|
||||
0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"expected breaks match actual breaks",
|
||||
new double[]{-0.2f,0.5f,0.7f,0.9f},
|
||||
h3.getBreaks(), 0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test public void testBuckets5()
|
||||
{
|
||||
final float[] values = new float[]{0.1f,0.5f,0.6f};
|
||||
ApproximateHistogram h = buildHistogram(50, values, 0f,1f);
|
||||
Histogram h4 = h.toHistogram(0.5f,0);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected counts match actual counts",
|
||||
new double[]{2,1},
|
||||
h4.getCounts(),
|
||||
0.1f
|
||||
);
|
||||
|
||||
Assert.assertArrayEquals(
|
||||
"Expected breaks match actual breaks",
|
||||
new double[]{0f,0.5f,1f},
|
||||
h4.getBreaks(),
|
||||
0.1f
|
||||
);
|
||||
}
|
||||
|
||||
@Test public void testEmptyHistogram() {
|
||||
ApproximateHistogram h = new ApproximateHistogram(50);
|
||||
Assert.assertArrayEquals(
|
||||
new float[]{Float.NaN, Float.NaN},
|
||||
h.getQuantiles(new float[]{0.8f, 0.9f}),
|
||||
1e-9f
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Druid - a distributed column store.
|
||||
* Copyright (C) 2012, 2013 Metamarkets Group Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
package io.druid.query.aggregation.histogram;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import org.hamcrest.CoreMatchers;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
public class QuantilesTest
|
||||
{
|
||||
@Test
|
||||
public void testSerialization() throws Exception
|
||||
{
|
||||
ObjectMapper mapper = new DefaultObjectMapper();
|
||||
|
||||
float[] probabilities = new float[]{0.25f, 0.5f, 0.75f};
|
||||
float[] quantiles = new float[]{0.25f, 0.5f, 0.75f};
|
||||
float min = 0f;
|
||||
float max = 4f;
|
||||
|
||||
String theString = mapper.writeValueAsString(
|
||||
new Quantiles(probabilities, quantiles, min, max)
|
||||
);
|
||||
|
||||
Object theObject = mapper.readValue(theString, Object.class);
|
||||
Assert.assertThat(theObject, CoreMatchers.instanceOf(LinkedHashMap.class));
|
||||
|
||||
LinkedHashMap theMap = (LinkedHashMap) theObject;
|
||||
|
||||
ArrayList theProbabilities = (ArrayList<Float>) theMap.get("probabilities");
|
||||
|
||||
Assert.assertEquals(probabilities.length, theProbabilities.size());
|
||||
for (int i = 0; i < theProbabilities.size(); ++i) {
|
||||
Assert.assertEquals(probabilities[i], ((Number) theProbabilities.get(i)).floatValue(), 0.0001f);
|
||||
}
|
||||
|
||||
ArrayList theQuantiles = (ArrayList<Float>) theMap.get("quantiles");
|
||||
|
||||
Assert.assertEquals(quantiles.length, theQuantiles.size());
|
||||
for (int i = 0; i < theQuantiles.size(); ++i) {
|
||||
Assert.assertEquals(quantiles[i], ((Number) theQuantiles.get(i)).floatValue(), 0.0001f);
|
||||
}
|
||||
|
||||
Assert.assertEquals(
|
||||
"serialized min. matches expected min.",
|
||||
min,
|
||||
((Number) theMap.get("min")).floatValue(),
|
||||
0.0001f
|
||||
);
|
||||
Assert.assertEquals(
|
||||
"serialized max. matches expected max.",
|
||||
max,
|
||||
((Number) theMap.get("max")).floatValue(),
|
||||
0.0001f
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue