mirror of https://github.com/apache/druid.git
HllSketch module (#5712)
* HllSketch module * updated license and imports * updated package name * implemented makeAggregateCombiner() * removed json marks * style fix * added module * removed unnecessary import, side effect of package renaming * use TreadLocalRandom * addressing code review points, mostly formatting and comments * javadoc * natural order with nulls * typo * factored out raw input value extraction * singleton * style fix * style fix * use Collections.singletonList instead of Arrays.asList * suppress warning
This commit is contained in:
parent
9a3195e98c
commit
93345064b5
|
@ -15,5 +15,6 @@ druid.extensions.loadList=["druid-datasketches"]
|
|||
The following modules are available:
|
||||
|
||||
* [Theta sketch](datasketches-theta.html) - approximate distinct counting with set operations (union, intersection and set difference).
|
||||
* [Tuple sketch](datasketches-tuple.html) - extension of Theta sketch to support values associated with distinct keys (arrays of numeric values in this specialized implementation)
|
||||
* [Tuple sketch](datasketches-tuple.html) - extension of Theta sketch to support values associated with distinct keys (arrays of numeric values in this specialized implementation).
|
||||
* [Quantiles sketch](datasketches-quantiles.html) - approximate distribution of comparable values to obtain ranks, quantiles and histograms. This is a specialized implementation for numeric values.
|
||||
* [HLL sketch](datasketches-hll.html) - approximate distinct counting using very compact HLL sketch.
|
|
@ -0,0 +1,82 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
|
||||
## DataSketches HLL Sketch module
|
||||
|
||||
This module provides Druid aggregators for distinct counting based on HLL sketch from [datasketches](http://datasketches.github.io/) library. At ingestion time, this aggregator creates the HLL sketch objects to be stored in Druid segments. At query time, sketches are read and merged together. In the end, by default, you receive the estimate of the number of distinct values presented to the sketch. Also, you can use post aggregator to produce a union of sketch columns in the same row.
|
||||
You can use the HLL sketch aggregator on columns of any identifiers. It will return estimated cardinality of the column.
|
||||
|
||||
To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file:
|
||||
|
||||
```
|
||||
druid.extensions.loadList=["druid-datasketches"]
|
||||
```
|
||||
|
||||
### Aggregators
|
||||
|
||||
```
|
||||
{
|
||||
"type" : "HLLSketchBuild",
|
||||
"name" : <output name>,
|
||||
"fieldName" : <metric name>,
|
||||
"lgK" : <size and accuracy parameter>,
|
||||
"tgtHllType" : <target HLL type>
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
{
|
||||
"type" : "HLLSketchMerge",
|
||||
"name" : <output name>,
|
||||
"fieldName" : <metric name>,
|
||||
"lgK" : <size and accuracy parameter>,
|
||||
"tgtHllType" : <target HLL type>
|
||||
}
|
||||
```
|
||||
|
||||
|property|description|required?|
|
||||
|--------|-----------|---------|
|
||||
|type|This String should be "HLLSketchBuild" or "HLLSketchMerge"|yes|
|
||||
|name|A String for the output (result) name of the calculation.|yes|
|
||||
|fieldName|A String for the name of the input field.|yes|
|
||||
|lgK|log2 of K that is the number of buckets in the sketch, parameter that controls the size and the accuracy. Must be a power of 2 from 4 to 21 inclusively.|no, defaults to 12|
|
||||
|tgtHllType|The type of the target HLL sketch. Must be "HLL_4", "HLL_6" or "HLL_8" |no, defaults to "HLL_4"|
|
||||
|
||||
### Post Aggregators
|
||||
|
||||
#### Estimate with bounds
|
||||
|
||||
```
|
||||
{
|
||||
"type" : "HLLSketchEstimateWithBounds",
|
||||
"name": <output name>,
|
||||
"field" : <post aggregator that returns an HLL Sketch>,
|
||||
"numStdDev" : <number of standard deviations: 1 (default), 2 or 3>
|
||||
}
|
||||
```
|
||||
|
||||
#### Union
|
||||
|
||||
```
|
||||
{
|
||||
"type" : "HLLSketchUnion",
|
||||
"name": <output name>,
|
||||
"fields" : <array of post aggregators that return HLL sketches>,
|
||||
"lgK": <log2 of K for the target sketch>,
|
||||
"tgtHllType" : <target HLL type>
|
||||
}
|
||||
```
|
||||
|
||||
#### Sketch to string
|
||||
|
||||
Human-readable sketch summary for debugging
|
||||
|
||||
```
|
||||
{
|
||||
"type" : "HLLSketchToString",
|
||||
"name": <output name>,
|
||||
"field" : <post aggregator that returns an HLL Sketch>
|
||||
}
|
||||
|
||||
```
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches;
|
||||
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.segment.serde.ComplexMetricExtractor;
|
||||
|
||||
public class RawInputValueExtractor implements ComplexMetricExtractor
|
||||
{
|
||||
|
||||
private static final RawInputValueExtractor EXTRACTOR = new RawInputValueExtractor();
|
||||
|
||||
private RawInputValueExtractor() {}
|
||||
|
||||
public static RawInputValueExtractor getInstance()
|
||||
{
|
||||
return EXTRACTOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<?> extractedClass()
|
||||
{
|
||||
return Object.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object extractValue(final InputRow inputRow, final String metricName)
|
||||
{
|
||||
return inputRow.getRaw(metricName);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
import com.yahoo.sketches.hll.Union;
|
||||
|
||||
import org.apache.druid.query.aggregation.AggregateCombiner;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.ObjectAggregateCombiner;
|
||||
import org.apache.druid.query.cache.CacheKeyBuilder;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* Base class for both build and merge factories
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
abstract class HllSketchAggregatorFactory extends AggregatorFactory
|
||||
{
|
||||
|
||||
static final int DEFAULT_LG_K = 12;
|
||||
static final TgtHllType DEFAULT_TGT_HLL_TYPE = TgtHllType.HLL_4;
|
||||
|
||||
static final Comparator<HllSketch> COMPARATOR =
|
||||
Comparator.nullsFirst(Comparator.comparingDouble(HllSketch::getEstimate));
|
||||
|
||||
private final String name;
|
||||
private final String fieldName;
|
||||
private final int lgK;
|
||||
private final TgtHllType tgtHllType;
|
||||
|
||||
HllSketchAggregatorFactory(
|
||||
final String name,
|
||||
final String fieldName,
|
||||
@Nullable final Integer lgK,
|
||||
@Nullable final String tgtHllType
|
||||
)
|
||||
{
|
||||
this.name = Objects.requireNonNull(name);
|
||||
this.fieldName = Objects.requireNonNull(fieldName);
|
||||
this.lgK = lgK == null ? DEFAULT_LG_K : lgK;
|
||||
this.tgtHllType = tgtHllType == null ? DEFAULT_TGT_HLL_TYPE : TgtHllType.valueOf(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getFieldName()
|
||||
{
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getLgK()
|
||||
{
|
||||
return lgK;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getTgtHllType()
|
||||
{
|
||||
return tgtHllType.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> requiredFields()
|
||||
{
|
||||
return Collections.singletonList(fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convoluted way to return a list of input field names this aggregator needs.
|
||||
* Currently the returned factories are only used to obtain a field name by calling getName() method.
|
||||
*/
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
return Collections.singletonList(new HllSketchBuildAggregatorFactory(fieldName, fieldName, lgK, tgtHllType.toString()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public HllSketch deserialize(final Object object)
|
||||
{
|
||||
return HllSketchMergeComplexMetricSerde.deserializeSketch(object);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HllSketch combine(final Object objectA, final Object objectB)
|
||||
{
|
||||
final Union union = new Union(lgK);
|
||||
union.update((HllSketch) objectA);
|
||||
union.update((HllSketch) objectB);
|
||||
return union.getResult(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregateCombiner makeAggregateCombiner()
|
||||
{
|
||||
return new ObjectAggregateCombiner<HllSketch>()
|
||||
{
|
||||
private final Union union = new Union(lgK);
|
||||
|
||||
@Override
|
||||
public void reset(final ColumnValueSelector selector)
|
||||
{
|
||||
union.reset();
|
||||
fold(selector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fold(final ColumnValueSelector selector)
|
||||
{
|
||||
final HllSketch sketch = (HllSketch) selector.getObject();
|
||||
union.update(sketch);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public HllSketch getObject()
|
||||
{
|
||||
return union.getResult(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<HllSketch> classOfObject()
|
||||
{
|
||||
return HllSketch.class;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double finalizeComputation(final Object object)
|
||||
{
|
||||
final HllSketch sketch = (HllSketch) object;
|
||||
return sketch.getEstimate();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<HllSketch> getComparator()
|
||||
{
|
||||
return COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getCombiningFactory()
|
||||
{
|
||||
return new HllSketchMergeAggregatorFactory(getName(), getName(), getLgK(), getTgtHllType().toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new CacheKeyBuilder(getCacheTypeId()).appendString(name).appendString(fieldName)
|
||||
.appendInt(lgK).appendInt(tgtHllType.ordinal()).build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object object)
|
||||
{
|
||||
if (this == object) {
|
||||
return true;
|
||||
}
|
||||
if (object == null || !getClass().equals(object.getClass())) {
|
||||
return false;
|
||||
}
|
||||
final HllSketchAggregatorFactory that = (HllSketchAggregatorFactory) object;
|
||||
if (!name.equals(that.getName())) {
|
||||
return false;
|
||||
}
|
||||
if (!fieldName.equals(that.getFieldName())) {
|
||||
return false;
|
||||
}
|
||||
if (lgK != that.getLgK()) {
|
||||
return false;
|
||||
}
|
||||
if (!tgtHllType.equals(that.tgtHllType)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(name, fieldName, lgK, tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return getClass().getSimpleName() + " {"
|
||||
+ "name=" + name
|
||||
+ "fieldName=" + fieldName
|
||||
+ "lgK=" + lgK
|
||||
+ "tgtHllType=" + tgtHllType
|
||||
+ "}";
|
||||
}
|
||||
|
||||
protected abstract byte getCacheTypeId();
|
||||
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* This aggregator builds sketches from raw data.
|
||||
* The input column can contain identifiers of type string, char[], byte[] or any numeric type.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchBuildAggregator implements Aggregator
|
||||
{
|
||||
|
||||
private final ColumnValueSelector<Object> selector;
|
||||
private HllSketch sketch;
|
||||
|
||||
public HllSketchBuildAggregator(
|
||||
final ColumnValueSelector<Object> selector,
|
||||
final int lgK,
|
||||
final TgtHllType tgtHllType
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.sketch = new HllSketch(lgK, tgtHllType);
|
||||
}
|
||||
|
||||
/*
|
||||
* This method is synchronized because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently.
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
final Object value = selector.getObject();
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
updateSketch(sketch, value);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This method is synchronized because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently.
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public synchronized Object get()
|
||||
{
|
||||
return sketch.copy();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
sketch = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong()
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
static void updateSketch(final HllSketch sketch, final Object value)
|
||||
{
|
||||
if (value instanceof Integer || value instanceof Long) {
|
||||
sketch.update(((Number) value).longValue());
|
||||
} else if (value instanceof Float || value instanceof Double) {
|
||||
sketch.update(((Number) value).doubleValue());
|
||||
} else if (value instanceof String) {
|
||||
sketch.update(((String) value).toCharArray());
|
||||
} else if (value instanceof char[]) {
|
||||
sketch.update((char[]) value);
|
||||
} else if (value instanceof byte[]) {
|
||||
sketch.update((byte[]) value);
|
||||
} else if (value instanceof int[]) {
|
||||
sketch.update((int[]) value);
|
||||
} else if (value instanceof long[]) {
|
||||
sketch.update((long[]) value);
|
||||
} else {
|
||||
throw new IAE("Unsupported type " + value.getClass());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.query.aggregation.AggregatorUtil;
|
||||
import org.apache.druid.query.aggregation.BufferAggregator;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* This aggregator factory is for building sketches from raw data.
|
||||
* The input column can contain identifiers of type string, char[], byte[] or any numeric type.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory
|
||||
{
|
||||
|
||||
@JsonCreator
|
||||
public HllSketchBuildAggregatorFactory(
|
||||
@JsonProperty("name") final String name,
|
||||
@JsonProperty("fieldName") final String fieldName,
|
||||
@JsonProperty("lgK") @Nullable final Integer lgK,
|
||||
@JsonProperty("tgtHllType") @Nullable final String tgtHllType)
|
||||
{
|
||||
super(name, fieldName, lgK, tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return HllSketchModule.BUILD_TYPE_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte getCacheTypeId()
|
||||
{
|
||||
return AggregatorUtil.HLL_SKETCH_BUILD_CACHE_TYPE_ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName());
|
||||
return new HllSketchBuildAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName());
|
||||
return new HllSketchBuildBufferAggregator(
|
||||
selector,
|
||||
getLgK(),
|
||||
TgtHllType.valueOf(getTgtHllType()),
|
||||
getMaxIntermediateSize()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* For the HLL_4 sketch type, this value can be exceeded slightly in extremely rare cases.
|
||||
* The sketch will request on-heap memory and move there. It is handled in HllSketchBuildBufferAggregator.
|
||||
*/
|
||||
@Override
|
||||
public int getMaxIntermediateSize()
|
||||
{
|
||||
return HllSketch.getMaxUpdatableSerializationBytes(getLgK(), TgtHllType.valueOf(getTgtHllType()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
|
||||
import com.google.common.util.concurrent.Striped;
|
||||
import com.yahoo.memory.WritableMemory;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
|
||||
import org.apache.druid.query.aggregation.BufferAggregator;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
|
||||
|
||||
/**
|
||||
* This aggregator builds sketches from raw data.
|
||||
* The input column can contain identifiers of type string, char[], byte[] or any numeric type.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchBuildBufferAggregator implements BufferAggregator
|
||||
{
|
||||
|
||||
// for locking per buffer position (power of 2 to make index computation faster)
|
||||
private static final int NUM_STRIPES = 64;
|
||||
|
||||
private final ColumnValueSelector<Object> selector;
|
||||
private final int lgK;
|
||||
private final TgtHllType tgtHllType;
|
||||
private final int size;
|
||||
private final IdentityHashMap<ByteBuffer, WritableMemory> memCache = new IdentityHashMap<>();
|
||||
private final IdentityHashMap<ByteBuffer, Int2ObjectMap<HllSketch>> sketchCache = new IdentityHashMap<>();
|
||||
private final Striped<ReadWriteLock> stripedLock = Striped.readWriteLock(NUM_STRIPES);
|
||||
|
||||
public HllSketchBuildBufferAggregator(
|
||||
final ColumnValueSelector<Object> selector,
|
||||
final int lgK,
|
||||
final TgtHllType tgtHllType,
|
||||
final int size
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.lgK = lgK;
|
||||
this.tgtHllType = tgtHllType;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final WritableMemory mem = getMemory(buf).writableRegion(position, size);
|
||||
putSketchIntoCache(buf, position, new HllSketch(lgK, tgtHllType, mem));
|
||||
}
|
||||
|
||||
/*
|
||||
* This method uses locks because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public void aggregate(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final Object value = selector.getObject();
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
final Lock lock = stripedLock.getAt(lockIndex(position)).writeLock();
|
||||
lock.lock();
|
||||
try {
|
||||
final HllSketch sketch = sketchCache.get(buf).get(position);
|
||||
HllSketchBuildAggregator.updateSketch(sketch, value);
|
||||
}
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This method uses locks because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public Object get(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final Lock lock = stripedLock.getAt(lockIndex(position)).readLock();
|
||||
lock.lock();
|
||||
try {
|
||||
return sketchCache.get(buf).get(position);
|
||||
}
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
memCache.clear();
|
||||
sketchCache.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(final ByteBuffer buf, final int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong(final ByteBuffer buf, final int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
private WritableMemory getMemory(final ByteBuffer buf)
|
||||
{
|
||||
return memCache.computeIfAbsent(buf, b -> WritableMemory.wrap(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* In very rare cases sketches can exceed given memory, request on-heap memory and move there.
|
||||
* We need to identify such sketches and reuse the same objects as opposed to wrapping new memory regions.
|
||||
*/
|
||||
@Override
|
||||
public void relocate(final int oldPosition, final int newPosition, final ByteBuffer oldBuf, final ByteBuffer newBuf)
|
||||
{
|
||||
HllSketch sketch = sketchCache.get(oldBuf).get(oldPosition);
|
||||
final WritableMemory oldMem = getMemory(oldBuf).writableRegion(oldPosition, size);
|
||||
if (sketch.isSameResource(oldMem)) { // sketch has not moved
|
||||
final WritableMemory newMem = getMemory(newBuf).writableRegion(newPosition, size);
|
||||
sketch = HllSketch.writableWrap(newMem);
|
||||
}
|
||||
putSketchIntoCache(newBuf, newPosition, sketch);
|
||||
}
|
||||
|
||||
private void putSketchIntoCache(final ByteBuffer buf, final int position, final HllSketch sketch)
|
||||
{
|
||||
final Int2ObjectMap<HllSketch> map = sketchCache.computeIfAbsent(buf, b -> new Int2ObjectOpenHashMap<>());
|
||||
map.put(position, sketch);
|
||||
}
|
||||
|
||||
/**
|
||||
* compute lock index to avoid boxing in Striped.get() call
|
||||
* @param position
|
||||
* @return index
|
||||
*/
|
||||
static int lockIndex(final int position)
|
||||
{
|
||||
return smear(position) % NUM_STRIPES;
|
||||
}
|
||||
|
||||
/**
|
||||
* see https://github.com/google/guava/blob/master/guava/src/com/google/common/util/concurrent/Striped.java#L536-L548
|
||||
* @param hashCode
|
||||
* @return smeared hashCode
|
||||
*/
|
||||
private static int smear(int hashCode)
|
||||
{
|
||||
hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12);
|
||||
return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import org.apache.druid.query.aggregation.datasketches.RawInputValueExtractor;
|
||||
import org.apache.druid.segment.serde.ComplexMetricExtractor;
|
||||
|
||||
public class HllSketchBuildComplexMetricSerde extends HllSketchMergeComplexMetricSerde
|
||||
{
|
||||
|
||||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return RawInputValueExtractor.getInstance();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonGenerator;
|
||||
import com.fasterxml.jackson.databind.JsonSerializer;
|
||||
import com.fasterxml.jackson.databind.SerializerProvider;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
public class HllSketchJsonSerializer extends JsonSerializer<HllSketch>
|
||||
{
|
||||
|
||||
@Override
|
||||
public void serialize(final HllSketch sketch, final JsonGenerator jgen, final SerializerProvider provider)
|
||||
throws IOException
|
||||
{
|
||||
jgen.writeBinary(sketch.toCompactByteArray());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import com.yahoo.sketches.hll.Union;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* This aggregator merges existing sketches.
|
||||
* The input column must contain {@link HllSketch}
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchMergeAggregator implements Aggregator
|
||||
{
|
||||
|
||||
private final ColumnValueSelector<HllSketch> selector;
|
||||
private final TgtHllType tgtHllType;
|
||||
private Union union;
|
||||
|
||||
public HllSketchMergeAggregator(
|
||||
final ColumnValueSelector<HllSketch> selector,
|
||||
final int lgK,
|
||||
final TgtHllType tgtHllType
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.tgtHllType = tgtHllType;
|
||||
this.union = new Union(lgK);
|
||||
}
|
||||
|
||||
/*
|
||||
* This method is synchronized because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently.
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public void aggregate()
|
||||
{
|
||||
final HllSketch sketch = selector.getObject();
|
||||
if (sketch == null) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
union.update(sketch);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This method is synchronized because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently.
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public synchronized Object get()
|
||||
{
|
||||
return union.getResult(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
union = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong()
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
import com.yahoo.sketches.hll.Union;
|
||||
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.query.aggregation.AggregatorUtil;
|
||||
import org.apache.druid.query.aggregation.BufferAggregator;
|
||||
import org.apache.druid.segment.ColumnSelectorFactory;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* This aggregator factory is for merging existing sketches.
|
||||
* The input column must contain {@link HllSketch}
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchMergeAggregatorFactory extends HllSketchAggregatorFactory
|
||||
{
|
||||
|
||||
@JsonCreator
|
||||
public HllSketchMergeAggregatorFactory(
|
||||
@JsonProperty("name") final String name,
|
||||
@JsonProperty("fieldName") final String fieldName,
|
||||
@JsonProperty("lgK") @Nullable final Integer lgK,
|
||||
@JsonProperty("tgtHllType") @Nullable final String tgtHllType
|
||||
)
|
||||
{
|
||||
super(name, fieldName, lgK, tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return HllSketchModule.MERGE_TYPE_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte getCacheTypeId()
|
||||
{
|
||||
return AggregatorUtil.HLL_SKETCH_MERGE_CACHE_TYPE_ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
final ColumnValueSelector<HllSketch> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName());
|
||||
return new HllSketchMergeAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
final ColumnValueSelector<HllSketch> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName());
|
||||
return new HllSketchMergeBufferAggregator(
|
||||
selector,
|
||||
getLgK(),
|
||||
TgtHllType.valueOf(getTgtHllType()),
|
||||
getMaxIntermediateSize()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxIntermediateSize()
|
||||
{
|
||||
return Union.getMaxSerializationBytes(getLgK());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
|
||||
import com.google.common.util.concurrent.Striped;
|
||||
import com.yahoo.memory.WritableMemory;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
import com.yahoo.sketches.hll.Union;
|
||||
|
||||
import org.apache.druid.query.aggregation.BufferAggregator;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
/**
|
||||
* This aggregator merges existing sketches.
|
||||
* The input column must contain {@link HllSketch}
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchMergeBufferAggregator implements BufferAggregator
|
||||
{
|
||||
|
||||
// for locking per buffer position (power of 2 to make index computation faster)
|
||||
private static final int NUM_STRIPES = 64;
|
||||
|
||||
private final ColumnValueSelector<HllSketch> selector;
|
||||
private final int lgK;
|
||||
private final TgtHllType tgtHllType;
|
||||
private final int size;
|
||||
private final Striped<ReadWriteLock> stripedLock = Striped.readWriteLock(NUM_STRIPES);
|
||||
|
||||
public HllSketchMergeBufferAggregator(
|
||||
final ColumnValueSelector<HllSketch> selector,
|
||||
final int lgK,
|
||||
final TgtHllType tgtHllType,
|
||||
final int size
|
||||
)
|
||||
{
|
||||
this.selector = selector;
|
||||
this.lgK = lgK;
|
||||
this.tgtHllType = tgtHllType;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@SuppressWarnings("ResultOfObjectAllocationIgnored")
|
||||
@Override
|
||||
public void init(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final WritableMemory mem = WritableMemory.wrap(buf).writableRegion(position, size);
|
||||
// Not necessary to keep the constructed object since it is cheap to reconstruct by wrapping the memory.
|
||||
// The objects are not cached as in BuildBufferAggregator since they never exceed the max size and never move.
|
||||
// So it is easier to reconstruct them by wrapping memory then to keep position-to-object mappings.
|
||||
new Union(lgK, mem);
|
||||
}
|
||||
|
||||
/*
|
||||
* This method uses locks because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public void aggregate(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final HllSketch sketch = selector.getObject();
|
||||
if (sketch == null) {
|
||||
return;
|
||||
}
|
||||
final WritableMemory mem = WritableMemory.wrap(buf).writableRegion(position, size);
|
||||
final Lock lock = stripedLock.getAt(HllSketchBuildBufferAggregator.lockIndex(position)).writeLock();
|
||||
lock.lock();
|
||||
try {
|
||||
final Union union = Union.writableWrap(mem);
|
||||
union.update(sketch);
|
||||
}
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This method uses locks because it can be used during indexing,
|
||||
* and Druid can call aggregate() and get() concurrently
|
||||
* See https://github.com/druid-io/druid/pull/3956
|
||||
*/
|
||||
@Override
|
||||
public Object get(final ByteBuffer buf, final int position)
|
||||
{
|
||||
final WritableMemory mem = WritableMemory.wrap(buf).writableRegion(position, size);
|
||||
final Lock lock = stripedLock.getAt(HllSketchBuildBufferAggregator.lockIndex(position)).readLock();
|
||||
lock.lock();
|
||||
try {
|
||||
final Union union = Union.writableWrap(mem);
|
||||
return union.getResult(tgtHllType);
|
||||
}
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat(final ByteBuffer buf, final int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong(final ByteBuffer buf, final int position)
|
||||
{
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
|
||||
import com.yahoo.memory.Memory;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.segment.GenericColumnSerializer;
|
||||
import org.apache.druid.segment.column.ColumnBuilder;
|
||||
import org.apache.druid.segment.data.GenericIndexed;
|
||||
import org.apache.druid.segment.data.ObjectStrategy;
|
||||
import org.apache.druid.segment.serde.ComplexColumnPartSupplier;
|
||||
import org.apache.druid.segment.serde.ComplexMetricExtractor;
|
||||
import org.apache.druid.segment.serde.ComplexMetricSerde;
|
||||
import org.apache.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
|
||||
public class HllSketchMergeComplexMetricSerde extends ComplexMetricSerde
|
||||
{
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
{
|
||||
return HllSketchModule.TYPE_NAME; // must be common type name
|
||||
}
|
||||
|
||||
@Override
|
||||
public ObjectStrategy getObjectStrategy()
|
||||
{
|
||||
return HllSketchObjectStrategy.STRATEGY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return new ComplexMetricExtractor()
|
||||
{
|
||||
@Override
|
||||
public Class<?> extractedClass()
|
||||
{
|
||||
return HllSketch.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HllSketch extractValue(final InputRow inputRow, final String metricName)
|
||||
{
|
||||
final Object object = inputRow.getRaw(metricName);
|
||||
if (object == null) {
|
||||
return null;
|
||||
}
|
||||
return deserializeSketch(object);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deserializeColumn(final ByteBuffer buf, final ColumnBuilder columnBuilder)
|
||||
{
|
||||
columnBuilder.setComplexColumn(
|
||||
new ComplexColumnPartSupplier(
|
||||
getTypeName(),
|
||||
GenericIndexed.read(buf, HllSketchObjectStrategy.STRATEGY, columnBuilder.getFileMapper())
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
static HllSketch deserializeSketch(final Object object)
|
||||
{
|
||||
if (object instanceof String) {
|
||||
return HllSketch.wrap(Memory.wrap(Base64.decodeBase64(((String) object).getBytes(StandardCharsets.UTF_8))));
|
||||
} else if (object instanceof byte[]) {
|
||||
return HllSketch.wrap(Memory.wrap((byte[]) object));
|
||||
} else if (object instanceof HllSketch) {
|
||||
return (HllSketch) object;
|
||||
}
|
||||
throw new IAE("Object is not of a type that can be deserialized to an HllSketch:" + object.getClass().getName());
|
||||
}
|
||||
|
||||
// support large columns
|
||||
@Override
|
||||
public GenericColumnSerializer getSerializer(final SegmentWriteOutMedium segmentWriteOutMedium, final String column)
|
||||
{
|
||||
return LargeColumnSupportedComplexColumnSerializer.create(segmentWriteOutMedium, column, this.getObjectStrategy());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.databind.Module;
|
||||
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.inject.Binder;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
import org.apache.druid.initialization.DruidModule;
|
||||
import org.apache.druid.segment.serde.ComplexMetrics;
|
||||
|
||||
/**
|
||||
* This module is to support count-distinct operations using {@link HllSketch}.
|
||||
* See <a href="https://datasketches.github.io/docs/HLL/HLL.html">HyperLogLog Sketch documentation</a>
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchModule implements DruidModule
|
||||
{
|
||||
|
||||
public static final String TYPE_NAME = "HLLSketch"; // common type name to be associated with segment data
|
||||
public static final String BUILD_TYPE_NAME = "HLLSketchBuild";
|
||||
public static final String MERGE_TYPE_NAME = "HLLSketchMerge";
|
||||
public static final String TO_STRING_TYPE_NAME = "HLLSketchToString";
|
||||
public static final String UNION_TYPE_NAME = "HLLSketchUnion";
|
||||
public static final String ESTIMATE_WITH_BOUNDS_TYPE_NAME = "HLLSketchEstimateWithBounds";
|
||||
|
||||
@Override
|
||||
public void configure(final Binder binder)
|
||||
{
|
||||
if (ComplexMetrics.getSerdeForType(TYPE_NAME) == null) {
|
||||
ComplexMetrics.registerSerde(TYPE_NAME, new HllSketchMergeComplexMetricSerde());
|
||||
}
|
||||
if (ComplexMetrics.getSerdeForType(BUILD_TYPE_NAME) == null) {
|
||||
ComplexMetrics.registerSerde(BUILD_TYPE_NAME, new HllSketchBuildComplexMetricSerde());
|
||||
}
|
||||
if (ComplexMetrics.getSerdeForType(MERGE_TYPE_NAME) == null) {
|
||||
ComplexMetrics.registerSerde(MERGE_TYPE_NAME, new HllSketchMergeComplexMetricSerde());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<? extends Module> getJacksonModules()
|
||||
{
|
||||
return Collections.singletonList(
|
||||
new SimpleModule("HllSketchModule").registerSubtypes(
|
||||
new NamedType(HllSketchMergeAggregatorFactory.class, MERGE_TYPE_NAME),
|
||||
new NamedType(HllSketchBuildAggregatorFactory.class, BUILD_TYPE_NAME),
|
||||
new NamedType(HllSketchMergeAggregatorFactory.class, TYPE_NAME),
|
||||
new NamedType(HllSketchToStringPostAggregator.class, TO_STRING_TYPE_NAME),
|
||||
new NamedType(HllSketchUnionPostAggregator.class, UNION_TYPE_NAME),
|
||||
new NamedType(HllSketchToEstimateWithBoundsPostAggregator.class, ESTIMATE_WITH_BOUNDS_TYPE_NAME)
|
||||
).addSerializer(HllSketch.class, new HllSketchJsonSerializer())
|
||||
);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import com.yahoo.memory.Memory;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
import org.apache.druid.segment.data.ObjectStrategy;
|
||||
|
||||
public class HllSketchObjectStrategy implements ObjectStrategy<HllSketch>
|
||||
{
|
||||
|
||||
static final HllSketchObjectStrategy STRATEGY = new HllSketchObjectStrategy();
|
||||
|
||||
@Override
|
||||
public Class<HllSketch> getClazz()
|
||||
{
|
||||
return HllSketch.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(final HllSketch sketch1, final HllSketch sketch2)
|
||||
{
|
||||
return HllSketchAggregatorFactory.COMPARATOR.compare(sketch1, sketch2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HllSketch fromByteBuffer(final ByteBuffer buf, final int size)
|
||||
{
|
||||
return HllSketch.wrap(Memory.wrap(buf).region(buf.position(), size));
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toBytes(final HllSketch sketch)
|
||||
{
|
||||
return sketch.toCompactByteArray();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.AggregatorUtil;
|
||||
import org.apache.druid.query.aggregation.PostAggregator;
|
||||
import org.apache.druid.query.cache.CacheKeyBuilder;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* Returns a distinct count estimate and error bounds from a given {@link HllSketch}.
|
||||
* The result will be three double values: estimate, lower bound and upper bound.
|
||||
* The bounds are provided at a given number of standard deviations (optional, defaults to 1).
|
||||
* This must be an integer value of 1, 2 or 3 corresponding to approximately 68.3%, 95.4% and 99.7%
|
||||
* confidence intervals.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchToEstimateWithBoundsPostAggregator implements PostAggregator
|
||||
{
|
||||
|
||||
private final String name;
|
||||
private final PostAggregator field;
|
||||
private final int numStdDevs;
|
||||
|
||||
@JsonCreator
|
||||
public HllSketchToEstimateWithBoundsPostAggregator(
|
||||
@JsonProperty("name") final String name,
|
||||
@JsonProperty("field") final PostAggregator field,
|
||||
@JsonProperty("numStdDev") @Nullable final Integer numStdDevs
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.field = field;
|
||||
this.numStdDevs = numStdDevs == null ? 1 : numStdDevs;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public PostAggregator getField()
|
||||
{
|
||||
return field;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getNumStdDev()
|
||||
{
|
||||
return numStdDevs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return field.getDependentFields();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<double[]> getComparator()
|
||||
{
|
||||
throw new IAE("Comparing arrays of estimates and error bounds is not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public double[] compute(final Map<String, Object> combinedAggregators)
|
||||
{
|
||||
final HllSketch sketch = (HllSketch) field.compute(combinedAggregators);
|
||||
return new double[] {sketch.getEstimate(), sketch.getLowerBound(numStdDevs), sketch.getUpperBound(numStdDevs)};
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostAggregator decorate(final Map<String, AggregatorFactory> aggregators)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return getClass().getSimpleName() + "{" +
|
||||
"name='" + name + '\'' +
|
||||
", field=" + field +
|
||||
", numStdDev=" + numStdDevs +
|
||||
"}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof HllSketchToEstimateWithBoundsPostAggregator)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HllSketchToEstimateWithBoundsPostAggregator that = (HllSketchToEstimateWithBoundsPostAggregator) o;
|
||||
|
||||
if (!name.equals(that.name)) {
|
||||
return false;
|
||||
}
|
||||
if (numStdDevs != that.numStdDevs) {
|
||||
return false;
|
||||
}
|
||||
return field.equals(that.field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(name, field, numStdDevs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new CacheKeyBuilder(AggregatorUtil.HLL_SKETCH_TO_ESTIMATE_AND_BOUNDS_CACHE_TYPE_ID)
|
||||
.appendString(name)
|
||||
.appendCacheable(field)
|
||||
.appendInt(numStdDevs)
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.AggregatorUtil;
|
||||
import org.apache.druid.query.aggregation.PostAggregator;
|
||||
import org.apache.druid.query.cache.CacheKeyBuilder;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Returns a human-readable summary of a given {@link HllSketch}.
|
||||
* This is a string returned by toString() method of the sketch.
|
||||
* This can be useful for debugging.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchToStringPostAggregator implements PostAggregator
|
||||
{
|
||||
|
||||
private final String name;
|
||||
private final PostAggregator field;
|
||||
|
||||
@JsonCreator
|
||||
public HllSketchToStringPostAggregator(
|
||||
@JsonProperty("name") final String name,
|
||||
@JsonProperty("field") final PostAggregator field
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
return field.getDependentFields();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<String> getComparator()
|
||||
{
|
||||
return Comparator.nullsFirst(Comparator.naturalOrder());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String compute(final Map<String, Object> combinedAggregators)
|
||||
{
|
||||
final HllSketch sketch = (HllSketch) field.compute(combinedAggregators);
|
||||
return sketch.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostAggregator decorate(final Map<String, AggregatorFactory> aggregators)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public PostAggregator getField()
|
||||
{
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return getClass().getSimpleName() + "{" +
|
||||
"name='" + name + '\'' +
|
||||
", field=" + field +
|
||||
"}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof HllSketchToStringPostAggregator)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HllSketchToStringPostAggregator that = (HllSketchToStringPostAggregator) o;
|
||||
|
||||
if (!name.equals(that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return field.equals(that.field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(name, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new CacheKeyBuilder(AggregatorUtil.HLL_SKETCH_TO_STRING_CACHE_TYPE_ID)
|
||||
.appendString(name)
|
||||
.appendCacheable(field)
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
import com.yahoo.sketches.hll.TgtHllType;
|
||||
import com.yahoo.sketches.hll.Union;
|
||||
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.AggregatorUtil;
|
||||
import org.apache.druid.query.aggregation.PostAggregator;
|
||||
import org.apache.druid.query.cache.CacheKeyBuilder;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* Returns a union of a given list of sketches.
|
||||
* @author Alexander Saydakov
|
||||
*/
|
||||
public class HllSketchUnionPostAggregator implements PostAggregator
|
||||
{
|
||||
|
||||
private final String name;
|
||||
private final List<PostAggregator> fields;
|
||||
private final int lgK;
|
||||
private final TgtHllType tgtHllType;
|
||||
|
||||
@JsonCreator
|
||||
public HllSketchUnionPostAggregator(
|
||||
@JsonProperty("name") final String name,
|
||||
@JsonProperty("fields") final List<PostAggregator> fields,
|
||||
@JsonProperty("lgK") @Nullable final Integer lgK,
|
||||
@JsonProperty("tgtHllType") @Nullable final String tgtHllType
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.fields = fields;
|
||||
this.lgK = lgK == null ? HllSketchAggregatorFactory.DEFAULT_LG_K : lgK;
|
||||
this.tgtHllType = tgtHllType == null ? HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE
|
||||
: TgtHllType.valueOf(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public List<PostAggregator> getFields()
|
||||
{
|
||||
return fields;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public int getLgK()
|
||||
{
|
||||
return lgK;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getTgtHllType()
|
||||
{
|
||||
return tgtHllType.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDependentFields()
|
||||
{
|
||||
final Set<String> dependentFields = Sets.newLinkedHashSet();
|
||||
for (final PostAggregator field : fields) {
|
||||
dependentFields.addAll(field.getDependentFields());
|
||||
}
|
||||
return dependentFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<HllSketch> getComparator()
|
||||
{
|
||||
return HllSketchAggregatorFactory.COMPARATOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HllSketch compute(final Map<String, Object> combinedAggregators)
|
||||
{
|
||||
final Union union = new Union(lgK);
|
||||
for (final PostAggregator field : fields) {
|
||||
final HllSketch sketch = (HllSketch) field.compute(combinedAggregators);
|
||||
union.update(sketch);
|
||||
}
|
||||
return union.getResult(tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostAggregator decorate(final Map<String, AggregatorFactory> aggregators)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return getClass().getSimpleName() + "{" +
|
||||
"name='" + name + '\'' +
|
||||
", fields=" + fields +
|
||||
"lgK=" + lgK +
|
||||
"tgtHllType=" + tgtHllType +
|
||||
"}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof HllSketchUnionPostAggregator)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HllSketchUnionPostAggregator that = (HllSketchUnionPostAggregator) o;
|
||||
|
||||
if (!name.equals(that.name)) {
|
||||
return false;
|
||||
}
|
||||
if (!fields.equals(that.fields)) {
|
||||
return false;
|
||||
}
|
||||
if (lgK != that.getLgK()) {
|
||||
return false;
|
||||
}
|
||||
if (!tgtHllType.equals(that.tgtHllType)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(name, fields, lgK, tgtHllType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return new CacheKeyBuilder(AggregatorUtil.HLL_SKETCH_UNION_CACHE_TYPE_ID)
|
||||
.appendString(name)
|
||||
.appendCacheablesIgnoringOrder(fields)
|
||||
.appendInt(lgK)
|
||||
.appendInt(tgtHllType.ordinal())
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.apache.druid.query.aggregation.datasketches.theta;
|
||||
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.query.aggregation.datasketches.RawInputValueExtractor;
|
||||
import org.apache.druid.segment.serde.ComplexMetricExtractor;
|
||||
|
||||
/**
|
||||
|
@ -29,20 +29,6 @@ public class SketchBuildComplexMetricSerde extends SketchMergeComplexMetricSerde
|
|||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return new ComplexMetricExtractor()
|
||||
{
|
||||
|
||||
@Override
|
||||
public Class<?> extractedClass()
|
||||
{
|
||||
return Object.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object extractValue(InputRow inputRow, String metricName)
|
||||
{
|
||||
return inputRow.getRaw(metricName);
|
||||
}
|
||||
};
|
||||
return RawInputValueExtractor.getInstance();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.apache.druid.query.aggregation.datasketches.tuple;
|
||||
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.query.aggregation.datasketches.RawInputValueExtractor;
|
||||
import org.apache.druid.segment.serde.ComplexMetricExtractor;
|
||||
|
||||
public class ArrayOfDoublesSketchBuildComplexMetricSerde extends ArrayOfDoublesSketchMergeComplexMetricSerde
|
||||
|
@ -28,20 +28,7 @@ public class ArrayOfDoublesSketchBuildComplexMetricSerde extends ArrayOfDoublesS
|
|||
@Override
|
||||
public ComplexMetricExtractor getExtractor()
|
||||
{
|
||||
return new ComplexMetricExtractor()
|
||||
{
|
||||
@Override
|
||||
public Class<?> extractedClass()
|
||||
{
|
||||
return Object.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object extractValue(final InputRow inputRow, final String metricName)
|
||||
{
|
||||
return inputRow.getRaw(metricName);
|
||||
}
|
||||
};
|
||||
return RawInputValueExtractor.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,3 +2,4 @@ org.apache.druid.query.aggregation.datasketches.theta.SketchModule
|
|||
org.apache.druid.query.aggregation.datasketches.theta.oldapi.OldApiSketchModule
|
||||
org.apache.druid.query.aggregation.datasketches.quantiles.DoublesSketchModule
|
||||
org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchModule
|
||||
org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
|
||||
import com.yahoo.sketches.hll.HllSketch;
|
||||
|
||||
// This is used for generating test data for HllSketchAggregatorTest
|
||||
class GenerateTestData
|
||||
{
|
||||
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
generateSketches();
|
||||
}
|
||||
|
||||
private static void generateSketches() throws Exception
|
||||
{
|
||||
int lgK = 12;
|
||||
String date = "20170101";
|
||||
Path rawPath = FileSystems.getDefault().getPath("hll_raw.tsv");
|
||||
Path sketchPath = FileSystems.getDefault().getPath("hll_sketches.tsv");
|
||||
try (BufferedWriter out1 = Files.newBufferedWriter(rawPath, StandardCharsets.UTF_8)) {
|
||||
try (BufferedWriter out2 = Files.newBufferedWriter(sketchPath, StandardCharsets.UTF_8)) {
|
||||
Random rand = ThreadLocalRandom.current();
|
||||
int key = 0;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
HllSketch sketch = new HllSketch(lgK);
|
||||
String dimension = Integer.toString(rand.nextInt(10) + 1);
|
||||
writeRawRecord(out1, date, dimension, key);
|
||||
sketch.update(key++);
|
||||
writeRawRecord(out1, date, dimension, key);
|
||||
sketch.update(key++);
|
||||
writeSketchRecord(out2, date, dimension, sketch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeRawRecord(BufferedWriter out, String date, String dimension, int id) throws Exception
|
||||
{
|
||||
out.write(date);
|
||||
out.write("\t");
|
||||
out.write(dimension);
|
||||
out.write("\t");
|
||||
out.write(Integer.toString(id));
|
||||
out.newLine();
|
||||
}
|
||||
|
||||
private static void writeSketchRecord(BufferedWriter out, String date, String dimension, HllSketch sketch) throws Exception
|
||||
{
|
||||
out.write(date);
|
||||
out.write("\t");
|
||||
out.write(dimension);
|
||||
out.write("\t");
|
||||
out.write(Base64.encodeBase64String(sketch.toCompactByteArray()));
|
||||
out.newLine();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.apache.druid.data.input.Row;
|
||||
import org.apache.druid.initialization.DruidModule;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.guava.Sequence;
|
||||
import org.apache.druid.query.aggregation.AggregationTestHelper;
|
||||
import org.apache.druid.query.groupby.GroupByQueryConfig;
|
||||
import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class HllSketchAggregatorTest
|
||||
{
|
||||
private final AggregationTestHelper helper;
|
||||
|
||||
@Rule
|
||||
public final TemporaryFolder tempFolder = new TemporaryFolder();
|
||||
|
||||
public HllSketchAggregatorTest(GroupByQueryConfig config)
|
||||
{
|
||||
DruidModule module = new HllSketchModule();
|
||||
module.configure(null);
|
||||
helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(
|
||||
module.getJacksonModules(), config, tempFolder);
|
||||
}
|
||||
|
||||
@Parameterized.Parameters(name = "{0}")
|
||||
public static Collection<?> constructorFeeder()
|
||||
{
|
||||
final List<Object[]> constructors = Lists.newArrayList();
|
||||
for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) {
|
||||
constructors.add(new Object[] {config});
|
||||
}
|
||||
return constructors;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ingestSketches() throws Exception
|
||||
{
|
||||
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(
|
||||
new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()),
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"type\": \"string\",",
|
||||
" \"parseSpec\": {",
|
||||
" \"format\": \"tsv\",",
|
||||
" \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMdd\"},",
|
||||
" \"dimensionsSpec\": {",
|
||||
" \"dimensions\": [\"dim\"],",
|
||||
" \"dimensionExclusions\": [],",
|
||||
" \"spatialDimensions\": []",
|
||||
" },",
|
||||
" \"columns\": [\"timestamp\", \"dim\", \"sketch\"]",
|
||||
" }",
|
||||
"}"),
|
||||
String.join("\n",
|
||||
"[",
|
||||
" {\"type\": \"HLLSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\"}",
|
||||
"]"),
|
||||
0, // minTimestamp
|
||||
Granularities.NONE,
|
||||
200, // maxRowCount
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"queryType\": \"groupBy\",",
|
||||
" \"dataSource\": \"test_datasource\",",
|
||||
" \"granularity\": \"ALL\",",
|
||||
" \"dimensions\": [],",
|
||||
" \"aggregations\": [",
|
||||
" {\"type\": \"HLLSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\"}",
|
||||
" ],",
|
||||
" \"intervals\": [\"2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z\"]",
|
||||
"}"));
|
||||
List<Row> results = seq.toList();
|
||||
Assert.assertEquals(1, results.size());
|
||||
Row row = results.get(0);
|
||||
Assert.assertEquals(200, (double) row.getMetric("sketch"), 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void buildSketchesAtIngestionTime() throws Exception
|
||||
{
|
||||
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(
|
||||
new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()),
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"type\": \"string\",",
|
||||
" \"parseSpec\": {",
|
||||
" \"format\": \"tsv\",",
|
||||
" \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMdd\"},",
|
||||
" \"dimensionsSpec\": {",
|
||||
" \"dimensions\": [\"dim\"],",
|
||||
" \"dimensionExclusions\": [],",
|
||||
" \"spatialDimensions\": []",
|
||||
" },",
|
||||
" \"columns\": [\"timestamp\", \"dim\", \"id\"]",
|
||||
" }",
|
||||
"}"),
|
||||
String.join("\n",
|
||||
"[",
|
||||
" {\"type\": \"HLLSketchBuild\", \"name\": \"sketch\", \"fieldName\": \"id\"}",
|
||||
"]"),
|
||||
0, // minTimestamp
|
||||
Granularities.NONE,
|
||||
200, // maxRowCount
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"queryType\": \"groupBy\",",
|
||||
" \"dataSource\": \"test_datasource\",",
|
||||
" \"granularity\": \"ALL\",",
|
||||
" \"dimensions\": [],",
|
||||
" \"aggregations\": [",
|
||||
" {\"type\": \"HLLSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\"}",
|
||||
" ],",
|
||||
" \"intervals\": [\"2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z\"]",
|
||||
"}"));
|
||||
List<Row> results = seq.toList();
|
||||
Assert.assertEquals(1, results.size());
|
||||
Row row = results.get(0);
|
||||
Assert.assertEquals(200, (double) row.getMetric("sketch"), 0.1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void buildSketchesAtQueryTime() throws Exception
|
||||
{
|
||||
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(
|
||||
new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()),
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"type\": \"string\",",
|
||||
" \"parseSpec\": {",
|
||||
" \"format\": \"tsv\",",
|
||||
" \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMdd\"},",
|
||||
" \"dimensionsSpec\": {",
|
||||
" \"dimensions\": [\"dim\", \"id\"],",
|
||||
" \"dimensionExclusions\": [],",
|
||||
" \"spatialDimensions\": []",
|
||||
" },",
|
||||
" \"columns\": [\"timestamp\", \"dim\", \"id\"]",
|
||||
" }",
|
||||
"}"),
|
||||
"[]",
|
||||
0, // minTimestamp
|
||||
Granularities.NONE,
|
||||
200, // maxRowCount
|
||||
String.join("\n",
|
||||
"{",
|
||||
" \"queryType\": \"groupBy\",",
|
||||
" \"dataSource\": \"test_datasource\",",
|
||||
" \"granularity\": \"ALL\",",
|
||||
" \"dimensions\": [],",
|
||||
" \"aggregations\": [",
|
||||
" {\"type\": \"HLLSketchBuild\", \"name\": \"sketch\", \"fieldName\": \"id\"}",
|
||||
" ],",
|
||||
" \"intervals\": [\"2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z\"]",
|
||||
"}"));
|
||||
List<Row> results = seq.toList();
|
||||
Assert.assertEquals(1, results.size());
|
||||
Row row = results.get(0);
|
||||
Assert.assertEquals(200, (double) row.getMetric("sketch"), 0.1);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
20170101 7 0
|
||||
20170101 7 1
|
||||
20170101 9 2
|
||||
20170101 9 3
|
||||
20170101 4 4
|
||||
20170101 4 5
|
||||
20170101 1 6
|
||||
20170101 1 7
|
||||
20170101 1 8
|
||||
20170101 1 9
|
||||
20170101 10 10
|
||||
20170101 10 11
|
||||
20170101 9 12
|
||||
20170101 9 13
|
||||
20170101 5 14
|
||||
20170101 5 15
|
||||
20170101 3 16
|
||||
20170101 3 17
|
||||
20170101 4 18
|
||||
20170101 4 19
|
||||
20170101 9 20
|
||||
20170101 9 21
|
||||
20170101 1 22
|
||||
20170101 1 23
|
||||
20170101 9 24
|
||||
20170101 9 25
|
||||
20170101 10 26
|
||||
20170101 10 27
|
||||
20170101 3 28
|
||||
20170101 3 29
|
||||
20170101 10 30
|
||||
20170101 10 31
|
||||
20170101 10 32
|
||||
20170101 10 33
|
||||
20170101 2 34
|
||||
20170101 2 35
|
||||
20170101 10 36
|
||||
20170101 10 37
|
||||
20170101 9 38
|
||||
20170101 9 39
|
||||
20170101 1 40
|
||||
20170101 1 41
|
||||
20170101 5 42
|
||||
20170101 5 43
|
||||
20170101 7 44
|
||||
20170101 7 45
|
||||
20170101 7 46
|
||||
20170101 7 47
|
||||
20170101 7 48
|
||||
20170101 7 49
|
||||
20170101 9 50
|
||||
20170101 9 51
|
||||
20170101 5 52
|
||||
20170101 5 53
|
||||
20170101 6 54
|
||||
20170101 6 55
|
||||
20170101 6 56
|
||||
20170101 6 57
|
||||
20170101 5 58
|
||||
20170101 5 59
|
||||
20170101 3 60
|
||||
20170101 3 61
|
||||
20170101 3 62
|
||||
20170101 3 63
|
||||
20170101 2 64
|
||||
20170101 2 65
|
||||
20170101 6 66
|
||||
20170101 6 67
|
||||
20170101 3 68
|
||||
20170101 3 69
|
||||
20170101 2 70
|
||||
20170101 2 71
|
||||
20170101 3 72
|
||||
20170101 3 73
|
||||
20170101 8 74
|
||||
20170101 8 75
|
||||
20170101 4 76
|
||||
20170101 4 77
|
||||
20170101 6 78
|
||||
20170101 6 79
|
||||
20170101 10 80
|
||||
20170101 10 81
|
||||
20170101 5 82
|
||||
20170101 5 83
|
||||
20170101 1 84
|
||||
20170101 1 85
|
||||
20170101 3 86
|
||||
20170101 3 87
|
||||
20170101 9 88
|
||||
20170101 9 89
|
||||
20170101 2 90
|
||||
20170101 2 91
|
||||
20170101 9 92
|
||||
20170101 9 93
|
||||
20170101 4 94
|
||||
20170101 4 95
|
||||
20170101 2 96
|
||||
20170101 2 97
|
||||
20170101 5 98
|
||||
20170101 5 99
|
||||
20170101 8 100
|
||||
20170101 8 101
|
||||
20170101 8 102
|
||||
20170101 8 103
|
||||
20170101 7 104
|
||||
20170101 7 105
|
||||
20170101 6 106
|
||||
20170101 6 107
|
||||
20170101 1 108
|
||||
20170101 1 109
|
||||
20170101 5 110
|
||||
20170101 5 111
|
||||
20170101 5 112
|
||||
20170101 5 113
|
||||
20170101 2 114
|
||||
20170101 2 115
|
||||
20170101 1 116
|
||||
20170101 1 117
|
||||
20170101 5 118
|
||||
20170101 5 119
|
||||
20170101 10 120
|
||||
20170101 10 121
|
||||
20170101 8 122
|
||||
20170101 8 123
|
||||
20170101 10 124
|
||||
20170101 10 125
|
||||
20170101 4 126
|
||||
20170101 4 127
|
||||
20170101 8 128
|
||||
20170101 8 129
|
||||
20170101 2 130
|
||||
20170101 2 131
|
||||
20170101 9 132
|
||||
20170101 9 133
|
||||
20170101 2 134
|
||||
20170101 2 135
|
||||
20170101 8 136
|
||||
20170101 8 137
|
||||
20170101 1 138
|
||||
20170101 1 139
|
||||
20170101 2 140
|
||||
20170101 2 141
|
||||
20170101 3 142
|
||||
20170101 3 143
|
||||
20170101 8 144
|
||||
20170101 8 145
|
||||
20170101 6 146
|
||||
20170101 6 147
|
||||
20170101 3 148
|
||||
20170101 3 149
|
||||
20170101 1 150
|
||||
20170101 1 151
|
||||
20170101 4 152
|
||||
20170101 4 153
|
||||
20170101 7 154
|
||||
20170101 7 155
|
||||
20170101 6 156
|
||||
20170101 6 157
|
||||
20170101 6 158
|
||||
20170101 6 159
|
||||
20170101 10 160
|
||||
20170101 10 161
|
||||
20170101 7 162
|
||||
20170101 7 163
|
||||
20170101 8 164
|
||||
20170101 8 165
|
||||
20170101 2 166
|
||||
20170101 2 167
|
||||
20170101 6 168
|
||||
20170101 6 169
|
||||
20170101 2 170
|
||||
20170101 2 171
|
||||
20170101 7 172
|
||||
20170101 7 173
|
||||
20170101 8 174
|
||||
20170101 8 175
|
||||
20170101 3 176
|
||||
20170101 3 177
|
||||
20170101 1 178
|
||||
20170101 1 179
|
||||
20170101 10 180
|
||||
20170101 10 181
|
||||
20170101 3 182
|
||||
20170101 3 183
|
||||
20170101 6 184
|
||||
20170101 6 185
|
||||
20170101 5 186
|
||||
20170101 5 187
|
||||
20170101 3 188
|
||||
20170101 3 189
|
||||
20170101 9 190
|
||||
20170101 9 191
|
||||
20170101 6 192
|
||||
20170101 6 193
|
||||
20170101 9 194
|
||||
20170101 9 195
|
||||
20170101 5 196
|
||||
20170101 5 197
|
||||
20170101 2 198
|
||||
20170101 2 199
|
|
|
@ -0,0 +1,100 @@
|
|||
20170101 7 AgEHDAMIAgDL18IEK/L7Bg==
|
||||
20170101 9 AgEHDAMIAgCGL/kNdYFmBw==
|
||||
20170101 4 AgEHDAMIAgCBvF0Ge2XmCA==
|
||||
20170101 1 AgEHDAMIAgD8LUIKwekXBQ==
|
||||
20170101 1 AgEHDAMIAgDSFnMHNKJhDg==
|
||||
20170101 10 AgEHDAMIAgCwW0YS9nHyBg==
|
||||
20170101 9 AgEHDAMIAgBGSrcErjyIEQ==
|
||||
20170101 5 AgEHDAMIAgB8dLkH21ItBA==
|
||||
20170101 3 AgEHDAMIAgBuxTQGzvBbHw==
|
||||
20170101 4 AgEHDAMIAgC4P/kHw91RBA==
|
||||
20170101 9 AgEHDAMIAgDIeiQExLWfBw==
|
||||
20170101 1 AgEHDAMIAgA2RwkHnuSbGA==
|
||||
20170101 9 AgEHDAMIAgC4VqkM7y33Bw==
|
||||
20170101 10 AgEHDAMIAgAiO+sFxhlqBA==
|
||||
20170101 3 AgEHDAMIAgA1qTEEWX/UDQ==
|
||||
20170101 10 AgEHDAMIAgCXu2AajcSJCQ==
|
||||
20170101 10 AgEHDAMIAgCeLWoHzqDvBQ==
|
||||
20170101 2 AgEHDAMIAgCqc4cWhGnMBQ==
|
||||
20170101 10 AgEHDAMIAgBt5R0Hti8JBg==
|
||||
20170101 9 AgEHDAMIAgCUB8IEMiViBQ==
|
||||
20170101 1 AgEHDAMIAgCTVDEFAiK0BA==
|
||||
20170101 5 AgEHDAMIAgAavNAFj8OwBg==
|
||||
20170101 7 AgEHDAMIAgDXeDQGMENfDg==
|
||||
20170101 7 AgEHDAMIAgCheuQJP5BIBA==
|
||||
20170101 7 AgEHDAMIAgAsRgUGxmI0Bw==
|
||||
20170101 9 AgEHDAMIAgDDnJofYZmnBw==
|
||||
20170101 5 AgEHDAMIAgCFiqsL1VKVCQ==
|
||||
20170101 6 AgEHDAMIAgDnUe4E2xeaCg==
|
||||
20170101 6 AgEHDAMIAgCwPp0H0U97BQ==
|
||||
20170101 5 AgEHDAMIAgCyqLgGqj5jBw==
|
||||
20170101 3 AgEHDAMIAgBkPRYFjUmsBQ==
|
||||
20170101 3 AgEHDAMIAgDnz5UX753zEA==
|
||||
20170101 2 AgEHDAMIAgCj85wK1ZcUBw==
|
||||
20170101 6 AgEHDAMIAgBkE7cG3Z96Cw==
|
||||
20170101 3 AgEHDAMIAgD8kLYF7eg9BQ==
|
||||
20170101 2 AgEHDAMIAgDVIhcT4DFZEA==
|
||||
20170101 3 AgEHDAMIAgAvGhMHFUS2Bw==
|
||||
20170101 8 AgEHDAMIAgDyeFwMqTu1Bw==
|
||||
20170101 4 AgEHDAMIAgDe2xkLtUytDw==
|
||||
20170101 6 AgEHDAMIAgDppI4TO2muBA==
|
||||
20170101 10 AgEHDAMIAgCdCmwPh7aqBA==
|
||||
20170101 5 AgEHDAMIAgBqQDsN4K6sCA==
|
||||
20170101 1 AgEHDAMIAgCUwyEEmog3Bw==
|
||||
20170101 3 AgEHDAMIAgAgZuMSvhmBBQ==
|
||||
20170101 9 AgEHDAMIAgCkeLMHu8kBBQ==
|
||||
20170101 2 AgEHDAMIAgDUi4wEmUlCBA==
|
||||
20170101 9 AgEHDAMIAgBl34oQH7oiCA==
|
||||
20170101 4 AgEHDAMIAgBvy0oHqRolBw==
|
||||
20170101 2 AgEHDAMIAgDfVngR3FoBBg==
|
||||
20170101 5 AgEHDAMIAgANX3oJBnNlBA==
|
||||
20170101 8 AgEHDAMIAgCJXbIMSe1qBg==
|
||||
20170101 8 AgEHDAMIAgDmC6MLGbKkCQ==
|
||||
20170101 7 AgEHDAMIAgDaXBIHJM/ODQ==
|
||||
20170101 6 AgEHDAMIAgDytqcL4O1JBw==
|
||||
20170101 1 AgEHDAMIAgA5iygH14ydDw==
|
||||
20170101 5 AgEHDAMIAgC+9PgGrbfMCA==
|
||||
20170101 5 AgEHDAMIAgBTYroEz+FIBA==
|
||||
20170101 2 AgEHDAMIAgD8mjERJV6mCQ==
|
||||
20170101 1 AgEHDAMIAgCXvygMqE7tBA==
|
||||
20170101 5 AgEHDAMIAgB7iqkJe8v2Bw==
|
||||
20170101 10 AgEHDAMIAgDhpXQGvSBSDg==
|
||||
20170101 8 AgEHDAMIAgCKYmgWw7A4BA==
|
||||
20170101 10 AgEHDAMIAgB9IzwK+a6eDw==
|
||||
20170101 4 AgEHDAMIAgDCcc8FL1N2Ew==
|
||||
20170101 8 AgEHDAMIAgAgqRkO6dIoBQ==
|
||||
20170101 2 AgEHDAMIAgBTg9oHmrtjBA==
|
||||
20170101 9 AgEHDAMIAgA3rOcHqVgzCQ==
|
||||
20170101 2 AgEHDAMIAgAUx4gPY+ISCA==
|
||||
20170101 8 AgEHDAMIAgDZG0oIywPsBg==
|
||||
20170101 1 AgEHDAMIAgD7acMG1DsKBw==
|
||||
20170101 2 AgEHDAMIAgCwEtsGuX8+BQ==
|
||||
20170101 3 AgEHDAMIAgD0Pd8PzklzBQ==
|
||||
20170101 8 AgEHDAMIAgAbIqkIFwV7BA==
|
||||
20170101 6 AgEHDAMIAgCkb6sG1s/xBg==
|
||||
20170101 3 AgEHDAMIAgDTE2cFR9azDQ==
|
||||
20170101 1 AgEHDAMIAgAqOfEG8nwcBg==
|
||||
20170101 4 AgEHDAMIAgD00/IL3fbbBg==
|
||||
20170101 7 AgEHDAMIAgA2KrIECxROBA==
|
||||
20170101 6 AgEHDAMIAgD16WYNk+yiCQ==
|
||||
20170101 6 AgEHDAMIAgBxQ8gJXnq7DA==
|
||||
20170101 10 AgEHDAMIAgDFsaYRga8yBA==
|
||||
20170101 7 AgEHDAMIAgC4AfwSdF5pCg==
|
||||
20170101 8 AgEHDAMIAgDSfIwRH3iCBw==
|
||||
20170101 2 AgEHDAMIAgCIJ6AETvjiBQ==
|
||||
20170101 6 AgEHDAMIAgAwRVkLPtleBg==
|
||||
20170101 2 AgEHDAMIAgBwyXYHn6ugDA==
|
||||
20170101 7 AgEHDAMIAgCUfqEUQPsNDg==
|
||||
20170101 8 AgEHDAMIAgC/zjYUuOxvCQ==
|
||||
20170101 3 AgEHDAMIAgAFiFMPBkJKCA==
|
||||
20170101 1 AgEHDAMIAgAu5AsOgZ6FBw==
|
||||
20170101 10 AgEHDAMIAgB532UEFy4gBg==
|
||||
20170101 3 AgEHDAMIAgDKjF4FQdfXCA==
|
||||
20170101 6 AgEHDAMIAgCTxHYFDF8FBQ==
|
||||
20170101 5 AgEHDAMIAgARy64GgMHzCQ==
|
||||
20170101 3 AgEHDAMIAgBrzP4EBn3vEQ==
|
||||
20170101 9 AgEHDAMIAgCmEVEEdwL+Bw==
|
||||
20170101 6 AgEHDAMIAgBUe0cPtxnPBw==
|
||||
20170101 9 AgEHDAMIAgDNWtkMSizzBA==
|
||||
20170101 5 AgEHDAMIAgDzofoJO729DA==
|
||||
20170101 2 AgEHDAMIAgDeUy0E+fjxBA==
|
|
|
@ -102,6 +102,13 @@ public class AggregatorUtil
|
|||
// Suppressed aggregator
|
||||
public static final byte SUPPRESSED_AGG_CACHE_TYPE_ID = 0x2D;
|
||||
|
||||
// HllSketch module in datasketches extension
|
||||
public static final byte HLL_SKETCH_BUILD_CACHE_TYPE_ID = 0x2E;
|
||||
public static final byte HLL_SKETCH_MERGE_CACHE_TYPE_ID = 0x2F;
|
||||
public static final byte HLL_SKETCH_UNION_CACHE_TYPE_ID = 0x30;
|
||||
public static final byte HLL_SKETCH_TO_STRING_CACHE_TYPE_ID = 0x31;
|
||||
public static final byte HLL_SKETCH_TO_ESTIMATE_AND_BOUNDS_CACHE_TYPE_ID = 0x32;
|
||||
|
||||
/**
|
||||
* returns the list of dependent postAggregators that should be calculated in order to calculate given postAgg
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue