Optimisations for LexicographicTopNs

initial review for perf optimizations for lexicographic TopNs

fix compilation

create map with proper size

review comment

review comment

review comments
This commit is contained in:
Nishant 2015-08-06 17:06:35 +05:30
parent 9dd18de1a5
commit b8d8a8da9e
4 changed files with 111 additions and 42 deletions

View File

@ -64,14 +64,14 @@ public class DimValHolder
public static class Builder public static class Builder
{ {
private Object topNMetricVal; private Object topNMetricVal;
private String dirName; private String dimName;
private Object dimValIndex; private Object dimValIndex;
private Map<String, Object> metricValues; private Map<String, Object> metricValues;
public Builder() public Builder()
{ {
topNMetricVal = null; topNMetricVal = null;
dirName = null; dimName = null;
dimValIndex = null; dimValIndex = null;
metricValues = null; metricValues = null;
} }
@ -82,9 +82,9 @@ public class DimValHolder
return this; return this;
} }
public Builder withDirName(String dirName) public Builder withDimName(String dimName)
{ {
this.dirName = dirName; this.dimName = dimName;
return this; return this;
} }
@ -102,7 +102,7 @@ public class DimValHolder
public DimValHolder build() public DimValHolder build()
{ {
return new DimValHolder(topNMetricVal, dirName, dimValIndex, metricValues); return new DimValHolder(topNMetricVal, dimName, dimValIndex, metricValues);
} }
} }
} }

View File

@ -41,6 +41,10 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
@Override @Override
public int compare(String s, String s2) public int compare(String s, String s2)
{ {
// Avoid conversion to bytes for equal references
if(s == s2){
return 0;
}
// null first // null first
if (s == null) { if (s == null) {
return -1; return -1;
@ -48,6 +52,7 @@ public class LexicographicTopNMetricSpec implements TopNMetricSpec
if (s2 == null) { if (s2 == null) {
return 1; return 1;
} }
return UnsignedBytes.lexicographicalComparator().compare( return UnsignedBytes.lexicographicalComparator().compare(
StringUtils.toUtf8(s), StringUtils.toUtf8(s),
StringUtils.toUtf8(s2) StringUtils.toUtf8(s2)

View File

@ -17,14 +17,16 @@
package io.druid.query.topn; package io.druid.query.topn;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.collect.MinMaxPriorityQueue;
import io.druid.query.Result; import io.druid.query.Result;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.DimensionSpec;
import java.util.Arrays;
import java.util.PriorityQueue;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -34,12 +36,15 @@ import java.util.Map;
*/ */
public class TopNLexicographicResultBuilder implements TopNResultBuilder public class TopNLexicographicResultBuilder implements TopNResultBuilder
{ {
private static final int LOOP_UNROLL_COUNT = 8;
private final DateTime timestamp; private final DateTime timestamp;
private final DimensionSpec dimSpec; private final DimensionSpec dimSpec;
private final String previousStop; private final String previousStop;
private final Comparator comparator; private final Comparator comparator;
private final List<AggregatorFactory> aggFactories; private final String[] aggFactoryNames;
private MinMaxPriorityQueue<DimValHolder> pQueue = null; private final PriorityQueue<DimValHolder> pQueue;
private final int threshold;
public TopNLexicographicResultBuilder( public TopNLexicographicResultBuilder(
DateTime timestamp, DateTime timestamp,
@ -54,9 +59,22 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
this.dimSpec = dimSpec; this.dimSpec = dimSpec;
this.previousStop = previousStop; this.previousStop = previousStop;
this.comparator = comparator; this.comparator = comparator;
this.aggFactories = aggFactories; this.aggFactoryNames = TopNQueryQueryToolChest.extractFactoryName(aggFactories);
this.threshold = threshold;
instantiatePQueue(threshold, comparator); this.pQueue = new PriorityQueue<>(
threshold + 1,
new Comparator<DimValHolder>()
{
@Override
public int compare(
DimValHolder o1,
DimValHolder o2
)
{
return comparator.compare(o2.getDimName(), o1.getDimName());
}
}
);
} }
@Override @Override
@ -66,16 +84,42 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
Object[] metricVals Object[] metricVals
) )
{ {
Map<String, Object> metricValues = Maps.newLinkedHashMap(); final Map<String, Object> metricValues = Maps.newHashMapWithExpectedSize(metricVals.length + 1);
if (comparator.compare(dimName, previousStop) > 0) { if (shouldAdd(dimName)) {
metricValues.put(dimSpec.getOutputName(), dimName); metricValues.put(dimSpec.getOutputName(), dimName);
Iterator<AggregatorFactory> aggsIter = aggFactories.iterator(); final int extra = metricVals.length % LOOP_UNROLL_COUNT;
for (Object metricVal : metricVals) { switch (extra) {
metricValues.put(aggsIter.next().getName(), metricVal); case 7:
metricValues.put(aggFactoryNames[6], metricVals[6]);
case 6:
metricValues.put(aggFactoryNames[5], metricVals[5]);
case 5:
metricValues.put(aggFactoryNames[4], metricVals[4]);
case 4:
metricValues.put(aggFactoryNames[3], metricVals[3]);
case 3:
metricValues.put(aggFactoryNames[2], metricVals[2]);
case 2:
metricValues.put(aggFactoryNames[1], metricVals[1]);
case 1:
metricValues.put(aggFactoryNames[0], metricVals[0]);
}
for (int i = extra; i < metricVals.length; i += LOOP_UNROLL_COUNT) {
metricValues.put(aggFactoryNames[i + 0], metricVals[i + 0]);
metricValues.put(aggFactoryNames[i + 1], metricVals[i + 1]);
metricValues.put(aggFactoryNames[i + 2], metricVals[i + 2]);
metricValues.put(aggFactoryNames[i + 3], metricVals[i + 3]);
metricValues.put(aggFactoryNames[i + 4], metricVals[i + 4]);
metricValues.put(aggFactoryNames[i + 5], metricVals[i + 5]);
metricValues.put(aggFactoryNames[i + 6], metricVals[i + 6]);
metricValues.put(aggFactoryNames[i + 7], metricVals[i + 7]);
} }
pQueue.add(new DimValHolder.Builder().withDirName(dimName).withMetricValues(metricValues).build()); pQueue.add(new DimValHolder.Builder().withDimName(dimName).withMetricValues(metricValues).build());
if (pQueue.size() > threshold) {
pQueue.poll();
}
} }
return this; return this;
@ -84,12 +128,18 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
@Override @Override
public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor) public TopNResultBuilder addEntry(DimensionAndMetricValueExtractor dimensionAndMetricValueExtractor)
{ {
pQueue.add( String dimensionValue = dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName());
new DimValHolder.Builder().withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
.build()
);
if (shouldAdd(dimensionValue)) {
pQueue.add(
new DimValHolder.Builder().withDimName(dimensionValue)
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
.build()
);
if (pQueue.size() > threshold) {
pQueue.poll();
}
}
return this; return this;
} }
@ -103,28 +153,43 @@ public class TopNLexicographicResultBuilder implements TopNResultBuilder
public Result<TopNResultValue> build() public Result<TopNResultValue> build()
{ {
// Pull out top aggregated values // Pull out top aggregated values
List<Map<String, Object>> values = new ArrayList<Map<String, Object>>(pQueue.size()); final DimValHolder[] holderValueArray = pQueue.toArray(new DimValHolder[0]);
while (!pQueue.isEmpty()) { Arrays.sort(
values.add(pQueue.remove().getMetricValues()); holderValueArray,
}
return new Result<TopNResultValue>(timestamp, new TopNResultValue(values));
}
private void instantiatePQueue(int threshold, final Comparator comparator)
{
this.pQueue = MinMaxPriorityQueue.orderedBy(
new Comparator<DimValHolder>() new Comparator<DimValHolder>()
{ {
@Override @Override
public int compare( public int compare(DimValHolder o1, DimValHolder o2)
DimValHolder o1,
DimValHolder o2
)
{ {
return comparator.compare(o1.getDimName(), o2.getDimName()); return comparator.compare(o1.getDimName(), o2.getDimName());
} }
} }
).maximumSize(threshold).create();
);
return new Result(
timestamp, new TopNResultValue(
Lists.transform(
Arrays.asList(holderValueArray),
new Function<DimValHolder, Object>()
{
@Override
public Object apply(DimValHolder dimValHolder)
{
return dimValHolder.getMetricValues();
}
}
)
)
);
} }
private boolean shouldAdd(String dimName)
{
final boolean belowThreshold = pQueue.size() < threshold;
final boolean belowMax = belowThreshold
|| comparator.compare(pQueue.peek().getTopNMetricVal(), dimName) < 0;
// Only add if dimName is after previousStop
return belowMax && (previousStop == null || comparator.compare(dimName, previousStop) > 0);
}
} }

View File

@ -41,7 +41,6 @@ import java.util.PriorityQueue;
*/ */
public class TopNNumericResultBuilder implements TopNResultBuilder public class TopNNumericResultBuilder implements TopNResultBuilder
{ {
private final DateTime timestamp; private final DateTime timestamp;
private final DimensionSpec dimSpec; private final DimensionSpec dimSpec;
private final String metricName; private final String metricName;
@ -166,7 +165,7 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
if (shouldAdd(topNMetricVal)) { if (shouldAdd(topNMetricVal)) {
DimValHolder dimValHolder = new DimValHolder.Builder() DimValHolder dimValHolder = new DimValHolder.Builder()
.withTopNMetricVal(topNMetricVal) .withTopNMetricVal(topNMetricVal)
.withDirName(dimName) .withDimName(dimName)
.withDimValIndex(dimValIndex) .withDimValIndex(dimValIndex)
.withMetricValues(metricValues) .withMetricValues(metricValues)
.build(); .build();
@ -195,7 +194,7 @@ public class TopNNumericResultBuilder implements TopNResultBuilder
if (shouldAdd(dimValue)) { if (shouldAdd(dimValue)) {
final DimValHolder valHolder = new DimValHolder.Builder() final DimValHolder valHolder = new DimValHolder.Builder()
.withTopNMetricVal(dimValue) .withTopNMetricVal(dimValue)
.withDirName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName())) .withDimName(dimensionAndMetricValueExtractor.getStringDimensionValue(dimSpec.getOutputName()))
.withMetricValues(dimensionAndMetricValueExtractor.getBaseObject()) .withMetricValues(dimensionAndMetricValueExtractor.getBaseObject())
.build(); .build();
pQueue.add(valHolder); pQueue.add(valHolder);