mirror of https://github.com/apache/druid.git
Optimisations for LexicographicTopNs
initial review for perf optimizations for lexicographic TopNs fix compilation create map with proper size review comment review comment review comments
This commit is contained in:
parent
9dd18de1a5
commit
b8d8a8da9e
|
@ -64,14 +64,14 @@ public class DimValHolder
|
||||||
public static class Builder
|
public static class Builder
|
||||||
{
|
{
|
||||||
private Object topNMetricVal;
|
private Object topNMetricVal;
|
||||||
private String dirName;
|
private String dimName;
|
||||||
private Object dimValIndex;
|
private Object dimValIndex;
|
||||||
private Map<String, Object> metricValues;
|
private Map<String, Object> metricValues;
|
||||||
|
|
||||||
public Builder()
|
public Builder()
|
||||||
{
|
{
|
||||||
topNMetricVal = null;
|
topNMetricVal = null;
|
||||||
dirName = null;
|
dimName = null;
|
||||||
dimValIndex = null;
|
dimValIndex = null;
|
||||||
metricValues = null;
|
metricValues = null;
|
||||||
}
|
}
|
||||||
|
@ -82,9 +82,9 @@ public class DimValHolder
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder withDirName(String dirName)
|
public Builder withDimName(String dimName)
|
||||||
{
|
{
|
||||||
this.dirName = dirName;
|
this.dimName = dimName;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ public class DimValHolder
|
||||||
|
|
||||||
public DimValHolder build()
|
public DimValHolder build()
|
||||||
{
|
{
|
||||||
return new DimValHolder(topNMetricVal, dirName, dimValIndex, metricValues);
|
return new DimValHolder(topNMetricVal, dimName, dimValIndex, metricValues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,10 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
@Override
|
@Override
|
||||||
public int compare(String s, String s2)
|
public int compare(String s, String s2)
|
||||||
{
|
{
|
||||||
|
// Avoid conversion to bytes for equal references
|
||||||
|
if(s == s2){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
// null first
|
// null first
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -48,6 +52,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
|
||||||
if (s2 == null) {
|
if (s2 == null) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return UnsignedBytes.lexicographicalComparator().compare(
|
return UnsignedBytes.lexicographicalComparator().compare(
|
||||||
StringUtils.toUtf8(s),
|
StringUtils.toUtf8(s),
|
||||||
StringUtils.toUtf8(s2)
|
StringUtils.toUtf8(s2)
|
||||||
|
|
|
@ -17,14 +17,16 @@
|
||||||
|
|
||||||
package io.druid.query.topn;
|
package io.druid.query.topn;
|
||||||
|
|
||||||
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.MinMaxPriorityQueue;
|
|
||||||
import io.druid.query.Result;
|
import io.druid.query.Result;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.dimension.DimensionSpec;
|
import io.druid.query.dimension.DimensionSpec;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -34,12 +36,15 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
{
|
{
|
||||||
|
private static final int LOOP_UNROLL_COUNT = 8;
|
||||||
|
|
||||||
private final DateTime timestamp;
|
private final DateTime timestamp;
|
||||||
private final DimensionSpec dimSpec;
|
private final DimensionSpec dimSpec;
|
||||||
private final String previousStop;
|
private final String previousStop;
|
||||||
private final Comparator comparator;
|
private final Comparator comparator;
|
||||||
private final List<AggregatorFactory> aggFactories;
|
private final String[] aggFactoryNames;
|
||||||
private MinMaxPriorityQueue<DimValHolder> pQueue = null;
|
private final PriorityQueue<DimValHolder> pQueue;
|
||||||
|
private final int threshold;
|
||||||
|
|
||||||
public TopNLexicographicResultBuilder(
|
public TopNLexicographicResultBuilder(
|
||||||
DateTime timestamp,
|
DateTime timestamp,
|
||||||
|
@ -54,9 +59,22 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
this.dimSpec = dimSpec;
|
this.dimSpec = dimSpec;
|
||||||
this.previousStop = previousStop;
|
this.previousStop = previousStop;
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
this.aggFactories = aggFactories;
|
this.aggFactoryNames = TopNQueryQueryToolChest.extractFactoryName(aggFactories);
|
||||||
|
this.threshold = threshold;
|
||||||
instantiatePQueue(threshold, comparator);
|
this.pQueue = new PriorityQueue<>(
|
||||||
|
threshold + 1,
|
||||||
|
new Comparator<DimValHolder>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public int compare(
|
||||||
|
DimValHolder o1,
|
||||||
|
DimValHolder o2
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return comparator.compare(o2.getDimName(), o1.getDimName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -66,16 +84,42 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
Object[] metricVals
|
Object[] metricVals
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Map<String, Object> metricValues = Maps.newLinkedHashMap();
|
final Map<String, Object> metricValues = Maps.newHashMapWithExpectedSize(metricVals.length + 1);
|
||||||
|
|
||||||
if (comparator.compare(dimName, previousStop) > 0) {
|
if (shouldAdd(dimName)) {
|
||||||
metricValues.put(dimSpec.getOutputName(), dimName);
|
metricValues.put(dimSpec.getOutputName(), dimName);
|
||||||
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator();
|
final int extra = metricVals.length % LOOP_UNROLL_COUNT;
|
||||||
for (Object metricVal : metricVals) {
|
switch (extra) {
|
||||||
metricValues.put(aggsIter.next().getName(), metricVal);
|
case 7:
|
||||||
|
metricValues.put(aggFactoryNames[6], metricVals[6]);
|
||||||
|
case 6:
|
||||||
|
metricValues.put(aggFactoryNames[5], metricVals[5]);
|
||||||
|
case 5:
|
||||||
|
metricValues.put(aggFactoryNames[4], metricVals[4]);
|
||||||
|
case 4:
|
||||||
|
metricValues.put(aggFactoryNames[3], metricVals[3]);
|
||||||
|
case 3:
|
||||||
|
metricValues.put(aggFactoryNames[2], metricVals[2]);
|
||||||
|
case 2:
|
||||||
|
metricValues.put(aggFactoryNames[1], metricVals[1]);
|
||||||
|
case 1:
|
||||||
|
metricValues.put(aggFactoryNames[0], metricVals[0]);
|
||||||
|
}
|
||||||
|
for (int i = extra; i < metricVals.length; i += LOOP_UNROLL_COUNT) {
|
||||||
|
metricValues.put(aggFactoryNames[i + 0], metricVals[i + 0]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 1], metricVals[i + 1]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 2], metricVals[i + 2]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 3], metricVals[i + 3]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 4], metricVals[i + 4]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 5], metricVals[i + 5]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 6], metricVals[i + 6]);
|
||||||
|
metricValues.put(aggFactoryNames[i + 7], metricVals[i + 7]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pQueue.add(new DimValHolder.Builder().withDirName(dimName).withMetricValues(metricValues).build());
|
pQueue.add(new DimValHolder.Builder().withDimName(dimName).withMetricValues(metricValues).build());
|
||||||
|
if (pQueue.size() > threshold) {
|
||||||
|
pQueue.poll();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
|
@ -84,12 +128,18 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
@Override
|
@Override
|
||||||
public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor)
|
public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor)
|
||||||
{
|
{
|
||||||
pQueue.add(
|
String dimensionValue = dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName());
|
||||||
new DimValHolder.Builder().withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
|
|
||||||
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
|
|
||||||
.build()
|
|
||||||
);
|
|
||||||
|
|
||||||
|
if (shouldAdd(dimensionValue)) {
|
||||||
|
pQueue.add(
|
||||||
|
new DimValHolder.Builder().withDimName(dimensionValue)
|
||||||
|
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
if (pQueue.size() > threshold) {
|
||||||
|
pQueue.poll();
|
||||||
|
}
|
||||||
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,28 +153,43 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
|
||||||
public Result<TopNResultValue> build()
|
public Result<TopNResultValue> build()
|
||||||
{
|
{
|
||||||
// Pull out top aggregated values
|
// Pull out top aggregated values
|
||||||
List<Map<String, Object>> values = new ArrayList<Map<String, Object>>(pQueue.size());
|
final DimValHolder[] holderValueArray = pQueue.toArray(new DimValHolder[0]);
|
||||||
while (!pQueue.isEmpty()) {
|
Arrays.sort(
|
||||||
values.add(pQueue.remove().getMetricValues());
|
holderValueArray,
|
||||||
}
|
|
||||||
|
|
||||||
return new Result<TopNResultValue>(timestamp, new TopNResultValue(values));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void instantiatePQueue(int threshold, final Comparator comparator)
|
|
||||||
{
|
|
||||||
this.pQueue = MinMaxPriorityQueue.orderedBy(
|
|
||||||
new Comparator<DimValHolder>()
|
new Comparator<DimValHolder>()
|
||||||
{
|
{
|
||||||
@Override
|
@Override
|
||||||
public int compare(
|
public int compare(DimValHolder o1, DimValHolder o2)
|
||||||
DimValHolder o1,
|
|
||||||
DimValHolder o2
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
return comparator.compare(o1.getDimName(), o2.getDimName());
|
return comparator.compare(o1.getDimName(), o2.getDimName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
).maximumSize(threshold).create();
|
|
||||||
|
);
|
||||||
|
return new Result(
|
||||||
|
timestamp, new TopNResultValue(
|
||||||
|
Lists.transform(
|
||||||
|
Arrays.asList(holderValueArray),
|
||||||
|
new Function<DimValHolder, Object>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Object apply(DimValHolder dimValHolder)
|
||||||
|
{
|
||||||
|
return dimValHolder.getMetricValues();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean shouldAdd(String dimName)
|
||||||
|
{
|
||||||
|
final boolean belowThreshold = pQueue.size() < threshold;
|
||||||
|
final boolean belowMax = belowThreshold
|
||||||
|
|| comparator.compare(pQueue.peek().getTopNMetricVal(), dimName) < 0;
|
||||||
|
// Only add if dimName is after previousStop
|
||||||
|
return belowMax && (previousStop == null || comparator.compare(dimName, previousStop) > 0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,6 @@ import java.util.PriorityQueue;
|
||||||
*/
|
*/
|
||||||
public class TopNNumericResultBuilder implements TopNResultBuilder
|
public class TopNNumericResultBuilder implements TopNResultBuilder
|
||||||
{
|
{
|
||||||
|
|
||||||
private final DateTime timestamp;
|
private final DateTime timestamp;
|
||||||
private final DimensionSpec dimSpec;
|
private final DimensionSpec dimSpec;
|
||||||
private final String metricName;
|
private final String metricName;
|
||||||
|
@ -166,7 +165,7 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
|
||||||
if (shouldAdd(topNMetricVal)) {
|
if (shouldAdd(topNMetricVal)) {
|
||||||
DimValHolder dimValHolder = new DimValHolder.Builder()
|
DimValHolder dimValHolder = new DimValHolder.Builder()
|
||||||
.withTopNMetricVal(topNMetricVal)
|
.withTopNMetricVal(topNMetricVal)
|
||||||
.withDirName(dimName)
|
.withDimName(dimName)
|
||||||
.withDimValIndex(dimValIndex)
|
.withDimValIndex(dimValIndex)
|
||||||
.withMetricValues(metricValues)
|
.withMetricValues(metricValues)
|
||||||
.build();
|
.build();
|
||||||
|
@ -195,7 +194,7 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
|
||||||
if (shouldAdd(dimValue)) {
|
if (shouldAdd(dimValue)) {
|
||||||
final DimValHolder valHolder = new DimValHolder.Builder()
|
final DimValHolder valHolder = new DimValHolder.Builder()
|
||||||
.withTopNMetricVal(dimValue)
|
.withTopNMetricVal(dimValue)
|
||||||
.withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
|
.withDimName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
|
||||||
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
|
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
|
||||||
.build();
|
.build();
|
||||||
pQueue.add(valHolder);
|
pQueue.add(valHolder);
|
||||||
|
|
Loading…
Reference in New Issue