diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index 14ce9694b78..dcb4b0519ab 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -26,8 +26,8 @@ import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorUtil; import io.druid.query.aggregation.PostAggregator; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -39,17 +39,17 @@ import java.util.List; */ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm { - private final Capabilities capabilities; + private final StorageAdapter storageAdapter; private final TopNQuery query; private final NonBlockingPool bufferPool; public AggregateTopNMetricFirstAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query, NonBlockingPool bufferPool ) { - this.capabilities = capabilities; + this.storageAdapter = storageAdapter; this.query = query; this.bufferPool = bufferPool; } @@ -91,7 +91,7 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm computeStartEnd(int cardinality) + @VisibleForTesting + public Pair computeStartEnd(int cardinality) { int startIndex = ignoreFirstN; @@ -305,7 +315,9 @@ public abstract class BaseTopNAlgorithm interval.contains(storageAdapter.getInterval()))) { endIndex = Math.min(endIndex, startIndex + query.getThreshold()); } diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 06cc5df1b3e..47373721c18 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -23,24 +23,25 @@ import com.google.common.base.Function; import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; import io.druid.query.topn.types.TopNColumnSelectorStrategy; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import java.util.Map; /** * This has to be its own strategy because the pooled topn algorithm assumes each index is unique, and cannot handle multiple index numerals referencing the same dimension value. */ -public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm, TopNParams> +public class DimExtractionTopNAlgorithm + extends BaseTopNAlgorithm, TopNParams> { private final TopNQuery query; public DimExtractionTopNAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query ) { - super(capabilities); + super(storageAdapter); this.query = query; } @@ -65,7 +66,7 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm selectorPlus = params.getSelectorPlus(); - return selectorPlus.getColumnSelectorStrategy().getDimExtractionRowSelector(query, params, capabilities); + return selectorPlus.getColumnSelectorStrategy().getDimExtractionRowSelector(query, params, storageAdapter); } @Override diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index d1bbf954d95..c191f4a6d7a 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -33,10 +33,10 @@ import io.druid.query.aggregation.SimpleDoubleBufferAggregator; import io.druid.query.monomorphicprocessing.SpecializationService; import io.druid.query.monomorphicprocessing.SpecializationState; import io.druid.query.monomorphicprocessing.StringRuntimeShape; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.FilteredOffset; +import io.druid.segment.StorageAdapter; import io.druid.segment.column.ValueType; import io.druid.segment.data.IndexedInts; import io.druid.segment.data.Offset; @@ -64,7 +64,9 @@ public class PooledTopNAlgorithm private static boolean specializeHistoricalSingleValueDimSelector1SimpleDoubleAggPooledTopN = !Boolean.getBoolean("dontSpecializeHistoricalSingleValueDimSelector1SimpleDoubleAggPooledTopN"); - /** See TopNQueryRunnerTest */ + /** + * See TopNQueryRunnerTest + */ @VisibleForTesting static void setSpecializeGeneric1AggPooledTopN(boolean value) { @@ -116,6 +118,7 @@ public class PooledTopNAlgorithm } private static final List specializedScanAndAggregateImplementations = new ArrayList<>(); + static { computeSpecializedScanAndAggregateImplementations(); } @@ -197,12 +200,12 @@ public class PooledTopNAlgorithm private static final int AGG_UNROLL_COUNT = 8; // Must be able to fit loop below public PooledTopNAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query, NonBlockingPool bufferPool ) { - super(capabilities); + super(storageAdapter); this.query = query; this.bufferPool = bufferPool; } @@ -226,7 +229,7 @@ public class PooledTopNAlgorithm final TopNMetricSpecBuilder arrayProvider = new BaseArrayProvider( dimSelector, query, - capabilities + storageAdapter ) { private final int[] positions = new int[cardinality]; diff --git a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java index 6b5bed59a72..791c6544bfc 100644 --- a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -20,11 +20,11 @@ package io.druid.query.topn; import com.google.common.collect.Maps; -import io.druid.query.aggregation.Aggregator; import io.druid.query.ColumnSelectorPlus; -import io.druid.segment.Capabilities; +import io.druid.query.aggregation.Aggregator; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; +import io.druid.segment.StorageAdapter; import java.util.Map; @@ -33,9 +33,9 @@ public class TimeExtractionTopNAlgorithm extends BaseTopNAlgorithm extends AbstractIndex imp for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) { ValueType type = TYPE_MAP.get(dimSchema.getValueType()); String dimName = dimSchema.getName(); - ColumnCapabilitiesImpl capabilities = makeCapabilitesFromValueType(type); + ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type); capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex()); if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { @@ -875,7 +875,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp } } - private ColumnCapabilitiesImpl makeCapabilitesFromValueType(ValueType type) + private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) { ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); capabilities.setDictionaryEncoded(type == ValueType.STRING); diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 218d6fb6597..35a375c72e5 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -142,6 +142,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter return indexer.getMaxValue(); } + @Override public Capabilities getCapabilities() { diff --git a/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java b/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java index c64e686fcb6..214be16da9f 100644 --- a/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java +++ b/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java @@ -20,7 +20,7 @@ package io.druid.query.topn; import io.druid.collections.ResourceHolder; -import io.druid.segment.Capabilities; +import io.druid.segment.StorageAdapter; import org.easymock.EasyMock; import org.junit.Test; @@ -31,14 +31,14 @@ public class PooledTopNAlgorithmTest @Test public void testCleanupWithNullParams() { - PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(Capabilities.builder().build(), null, null); + PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(EasyMock.mock(StorageAdapter.class), null, null); pooledTopNAlgorithm.cleanup(null); } @Test public void cleanup() { - PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(Capabilities.builder().build(), null, null); + PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(EasyMock.mock(StorageAdapter.class), null, null); PooledTopNAlgorithm.PooledTopNParams params = EasyMock.createMock(PooledTopNAlgorithm.PooledTopNParams.class); ResourceHolder resourceHolder = EasyMock.createMock(ResourceHolder.class); EasyMock.expect(params.getResultsBufHolder()).andReturn(resourceHolder).times(1); diff --git a/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java b/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java new file mode 100644 index 00000000000..14305ea775a --- /dev/null +++ b/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java @@ -0,0 +1,480 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.topn; + +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; +import io.druid.java.util.common.granularity.Granularity; +import io.druid.java.util.common.guava.Sequence; +import io.druid.query.QueryMetrics; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.DoubleMaxAggregatorFactory; +import io.druid.query.aggregation.DoubleMinAggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.filter.Filter; +import io.druid.query.filter.ValueMatcher; +import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.IdLookup; +import io.druid.segment.Metadata; +import io.druid.segment.StorageAdapter; +import io.druid.segment.VirtualColumns; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.data.Indexed; +import io.druid.segment.data.IndexedInts; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.util.Arrays; + +import static io.druid.query.QueryRunnerTestHelper.addRowsIndexConstant; +import static io.druid.query.QueryRunnerTestHelper.allGran; +import static io.druid.query.QueryRunnerTestHelper.commonDoubleAggregators; +import static io.druid.query.QueryRunnerTestHelper.dataSource; +import static io.druid.query.QueryRunnerTestHelper.indexMetric; +import static io.druid.query.QueryRunnerTestHelper.marketDimension; +import static io.druid.query.QueryRunnerTestHelper.qualityDimension; + +public class TopNMetricSpecOptimizationsTest +{ + @Test + public void testShouldOptimizeLexicographic() + { + // query interval is greater than segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(threshold), thePair.rhs); + } + + @Test + public void testAlsoShouldOptimizeLexicographic() + { + // query interval is same as segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z") + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(threshold), thePair.rhs); + } + + @Test + public void testShouldNotOptimizeLexicographic() + { + // query interval is smaller than segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z") + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-31T00:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + @Test + public void testAlsoShouldNotOptimizeLexicographic() + { + // query interval is larger than segment interval, but has filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .filters(qualityDimension, "entertainment") + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + @Test + public void testAgainShouldNotOptimizeLexicographic() + { + // query interval is larger than segment interval, no filters, can NOT ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.asList(addRowsIndexConstant)) + .build(); + + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + Pair thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + private StorageAdapter makeFakeStorageAdapter(String start, String end, int cardinality) + { + StorageAdapter adapter = new StorageAdapter() + { + @Override + public Interval getInterval() + { + return Intervals.of(start + "/" + end); + } + + @Override + public int getDimensionCardinality(String column) + { + return cardinality; + } + + @Override + public DateTime getMinTime() + { + return DateTimes.of(start); + } + + + @Override + public DateTime getMaxTime() + { + return DateTimes.of(end); + } + + // stubs below this line not important for tests + @Override + public String getSegmentIdentifier() + { + return null; + } + + + @Override + public Indexed getAvailableDimensions() + { + return null; + } + + @Override + public Iterable getAvailableMetrics() + { + return null; + } + + @Nullable + @Override + public Comparable getMinValue(String column) + { + return null; + } + + @Nullable + @Override + public Comparable getMaxValue(String column) + { + return null; + } + + @Override + public Capabilities getCapabilities() + { + return Capabilities.builder().dimensionValuesSorted(true).build(); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return null; + } + + @Nullable + @Override + public String getColumnTypeName(String column) + { + return null; + } + + @Override + public int getNumRows() + { + return 0; + } + + @Override + public DateTime getMaxIngestedEventTime() + { + return null; + } + + @Override + public Metadata getMetadata() + { + return null; + } + + @Override + public Sequence makeCursors( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + Granularity gran, + boolean descending, + @Nullable QueryMetrics queryMetrics + ) + { + return null; + } + }; + + return adapter; + } + + private DimensionSelector makeFakeDimSelector(int cardinality) + { + + DimensionSelector dimSelector = new DimensionSelector() + { + @Override + public int getValueCardinality() + { + return cardinality; + } + + // stubs below this line not important for tests + @Override + public IndexedInts getRow() + { + return null; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return null; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return null; + } + + @Nullable + @Override + public String lookupName(int id) + { + return null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + }; + + return dimSelector; + } +}