mirror of https://github.com/apache/druid.git
Fix FilteredAggregators at ingestion time and in groupBy v2 nested queries. (#3312)
The common theme between the two is they both create "fake" DimensionSelectors that work on top of Rows. They both do it because there isn't really any dictionary for the underlying Rows, they're just a stream of data. The fix for both is to allow a DimensionSelector to tell callers that it has no dictionary by returning CARDINALITY_UNKNOWN from getValueCardinality. The callers, in turn, can avoid using it in ways that assume it has a dictionary. Fixes #3311.
This commit is contained in:
parent
ae3e0015b6
commit
0299ac73b8
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.metamx.common.UOE;
|
||||
import io.druid.query.dimension.DefaultDimensionSpec;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.DruidLongPredicate;
|
||||
|
@ -43,6 +42,7 @@ import java.nio.ByteBuffer;
|
|||
import java.util.BitSet;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class FilteredAggregatorFactory extends AggregatorFactory
|
||||
{
|
||||
|
@ -246,27 +246,55 @@ public class FilteredAggregatorFactory extends AggregatorFactory
|
|||
return new BooleanValueMatcher(valueString == null);
|
||||
}
|
||||
|
||||
final int valueId = selector.lookupId(valueString);
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
final int cardinality = selector.getValueCardinality();
|
||||
|
||||
if (cardinality >= 0) {
|
||||
// Dictionary-encoded dimension. Compare by id instead of by value to save time.
|
||||
final int valueId = selector.lookupId(valueString);
|
||||
|
||||
return new ValueMatcher()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return valueString == null;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (row.get(i) == valueId) {
|
||||
return true;
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return valueString == null;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (row.get(i) == valueId) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
} else {
|
||||
// Not dictionary-encoded. Skip the optimization.
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return valueString == null;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (Objects.equals(selector.lookupName(row.get(i)), valueString)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public ValueMatcher makeValueMatcher(final String dimension, final DruidPredicateFactory predicateFactory)
|
||||
|
@ -294,35 +322,60 @@ public class FilteredAggregatorFactory extends AggregatorFactory
|
|||
return new BooleanValueMatcher(doesMatchNull);
|
||||
}
|
||||
|
||||
// Check every value in the dimension, as a String.
|
||||
final int cardinality = selector.getValueCardinality();
|
||||
final BitSet valueIds = new BitSet(cardinality);
|
||||
for (int i = 0; i < cardinality; i++) {
|
||||
if (predicate.apply(selector.lookupName(i))) {
|
||||
valueIds.set(i);
|
||||
}
|
||||
}
|
||||
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return doesMatchNull;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valueIds.get(row.get(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
if (cardinality >= 0) {
|
||||
// Dictionary-encoded dimension. Check every value; build a bitset of matching ids.
|
||||
final BitSet valueIds = new BitSet(cardinality);
|
||||
for (int i = 0; i < cardinality; i++) {
|
||||
if (predicate.apply(selector.lookupName(i))) {
|
||||
valueIds.set(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return doesMatchNull;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (valueIds.get(row.get(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// Not dictionary-encoded. Skip the optimization.
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = selector.getRow();
|
||||
final int size = row.size();
|
||||
if (size == 0) {
|
||||
// null should match empty rows in multi-value columns
|
||||
return doesMatchNull;
|
||||
} else {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (predicate.apply(selector.lookupName(row.get(i)))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private ValueMatcher makeLongValueMatcher(String dimension, DruidLongPredicate predicate)
|
||||
|
|
|
@ -74,7 +74,10 @@ public class ListFilteredDimensionSpec extends BaseFilteredDimensionSpec
|
|||
return selector;
|
||||
}
|
||||
|
||||
int selectorCardinality = selector.getValueCardinality();
|
||||
final int selectorCardinality = selector.getValueCardinality();
|
||||
if (selectorCardinality < 0) {
|
||||
throw new UnsupportedOperationException("Cannot decorate a selector with no dictionary");
|
||||
}
|
||||
int cardinality = isWhitelist ? values.size() : selectorCardinality - values.size();
|
||||
|
||||
int count = 0;
|
||||
|
|
|
@ -68,7 +68,12 @@ public class RegexFilteredDimensionSpec extends BaseFilteredDimensionSpec
|
|||
int count = 0;
|
||||
final Map<Integer,Integer> forwardMapping = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < selector.getValueCardinality(); i++) {
|
||||
final int selectorCardinality = selector.getValueCardinality();
|
||||
if (selectorCardinality < 0) {
|
||||
throw new UnsupportedOperationException("Cannot decorate a selector with no dictionary");
|
||||
}
|
||||
|
||||
for (int i = 0; i < selectorCardinality; i++) {
|
||||
if (compiledRegex.matcher(Strings.nullToEmpty(selector.lookupName(i))).matches()) {
|
||||
forwardMapping.put(i, count++);
|
||||
}
|
||||
|
|
|
@ -68,6 +68,8 @@ import java.util.NoSuchElementException;
|
|||
*/
|
||||
public class GroupByQueryEngine
|
||||
{
|
||||
private static final int MISSING_VALUE = -1;
|
||||
|
||||
private final Supplier<GroupByQueryConfig> config;
|
||||
private final StupidPool<ByteBuffer> intermediateResultsBufferPool;
|
||||
|
||||
|
@ -189,7 +191,7 @@ public class GroupByQueryEngine
|
|||
final IndexedInts row = dimSelector.getRow();
|
||||
if (row == null || row.size() == 0) {
|
||||
ByteBuffer newKey = key.duplicate();
|
||||
newKey.putInt(dimSelector.getValueCardinality());
|
||||
newKey.putInt(MISSING_VALUE);
|
||||
unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size()));
|
||||
} else {
|
||||
for (Integer dimValue : row) {
|
||||
|
@ -405,7 +407,7 @@ public class GroupByQueryEngine
|
|||
for (int i = 0; i < dimensions.size(); ++i) {
|
||||
final DimensionSelector dimSelector = dimensions.get(i);
|
||||
final int dimVal = keyBuffer.getInt();
|
||||
if (dimSelector.getValueCardinality() != dimVal) {
|
||||
if (MISSING_VALUE != dimVal) {
|
||||
theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,6 @@ import com.metamx.common.guava.Accumulator;
|
|||
import io.druid.data.input.MapBasedRow;
|
||||
import io.druid.data.input.Row;
|
||||
import io.druid.granularity.AllGranularity;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.dimension.DimensionSpec;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
|
@ -527,7 +526,7 @@ public class RowBasedGrouperHelper
|
|||
@Override
|
||||
public int getValueCardinality()
|
||||
{
|
||||
throw new UnsupportedOperationException("value cardinality is unknown");
|
||||
return DimensionSelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -61,7 +61,6 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
|
|||
return new TopNParams(
|
||||
dimSelector,
|
||||
cursor,
|
||||
dimSelector.getValueCardinality(),
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
}
|
||||
|
@ -129,7 +128,7 @@ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], T
|
|||
|
||||
private int[] getDimValSelectorForTopNMetric(TopNParams params, TopNResultBuilder resultBuilder)
|
||||
{
|
||||
int[] dimValSelector = new int[params.getDimSelector().getValueCardinality()];
|
||||
int[] dimValSelector = new int[params.getCardinality()];
|
||||
Arrays.fill(dimValSelector, SKIP_POSITION_VALUE);
|
||||
|
||||
Iterator<DimValHolder> dimValIter = resultBuilder.getTopNIterator();
|
||||
|
|
|
@ -196,6 +196,10 @@ public abstract class BaseTopNAlgorithm<DimValSelector, DimValAggregateStore, Pa
|
|||
ignoreAfterThreshold = false;
|
||||
ignoreFirstN = 0;
|
||||
keepOnlyN = dimSelector.getValueCardinality();
|
||||
|
||||
if (keepOnlyN < 0) {
|
||||
throw new UnsupportedOperationException("Cannot operate on a dimension with no dictionary");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -54,7 +54,6 @@ public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][]
|
|||
return new TopNParams(
|
||||
dimSelector,
|
||||
cursor,
|
||||
dimSelector.getValueCardinality(),
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
}
|
||||
|
|
|
@ -67,6 +67,10 @@ public class PooledTopNAlgorithm
|
|||
|
||||
final int cardinality = dimSelector.getValueCardinality();
|
||||
|
||||
if (cardinality < 0) {
|
||||
throw new UnsupportedOperationException("Cannot operate on a dimension with no dictionary");
|
||||
}
|
||||
|
||||
final TopNMetricSpecBuilder<int[]> arrayProvider = new BaseArrayProvider<int[]>(
|
||||
dimSelector,
|
||||
query,
|
||||
|
@ -102,7 +106,6 @@ public class PooledTopNAlgorithm
|
|||
return PooledTopNParams.builder()
|
||||
.withDimSelector(dimSelector)
|
||||
.withCursor(cursor)
|
||||
.withCardinality(cardinality)
|
||||
.withResultsBufHolder(resultsBufHolder)
|
||||
.withResultsBuf(resultsBuf)
|
||||
.withArrayProvider(arrayProvider)
|
||||
|
@ -517,7 +520,6 @@ public class PooledTopNAlgorithm
|
|||
public PooledTopNParams(
|
||||
DimensionSelector dimSelector,
|
||||
Cursor cursor,
|
||||
int cardinality,
|
||||
ResourceHolder<ByteBuffer> resultsBufHolder,
|
||||
ByteBuffer resultsBuf,
|
||||
int[] aggregatorSizes,
|
||||
|
@ -526,7 +528,7 @@ public class PooledTopNAlgorithm
|
|||
TopNMetricSpecBuilder<int[]> arrayProvider
|
||||
)
|
||||
{
|
||||
super(dimSelector, cursor, cardinality, numValuesPerPass);
|
||||
super(dimSelector, cursor, numValuesPerPass);
|
||||
|
||||
this.resultsBufHolder = resultsBufHolder;
|
||||
this.resultsBuf = resultsBuf;
|
||||
|
@ -569,7 +571,6 @@ public class PooledTopNAlgorithm
|
|||
{
|
||||
private DimensionSelector dimSelector;
|
||||
private Cursor cursor;
|
||||
private int cardinality;
|
||||
private ResourceHolder<ByteBuffer> resultsBufHolder;
|
||||
private ByteBuffer resultsBuf;
|
||||
private int[] aggregatorSizes;
|
||||
|
@ -581,7 +582,6 @@ public class PooledTopNAlgorithm
|
|||
{
|
||||
dimSelector = null;
|
||||
cursor = null;
|
||||
cardinality = 0;
|
||||
resultsBufHolder = null;
|
||||
resultsBuf = null;
|
||||
aggregatorSizes = null;
|
||||
|
@ -602,12 +602,6 @@ public class PooledTopNAlgorithm
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder withCardinality(int cardinality)
|
||||
{
|
||||
this.cardinality = cardinality;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withResultsBufHolder(ResourceHolder<ByteBuffer> resultsBufHolder)
|
||||
{
|
||||
this.resultsBufHolder = resultsBufHolder;
|
||||
|
@ -649,7 +643,6 @@ public class PooledTopNAlgorithm
|
|||
return new PooledTopNParams(
|
||||
dimSelector,
|
||||
cursor,
|
||||
cardinality,
|
||||
resultsBufHolder,
|
||||
resultsBuf,
|
||||
aggregatorSizes,
|
||||
|
|
|
@ -45,7 +45,6 @@ public class TimeExtractionTopNAlgorithm extends BaseTopNAlgorithm<int[], Map<St
|
|||
return new TopNParams(
|
||||
dimSelector,
|
||||
cursor,
|
||||
dimSelector.getValueCardinality(),
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
}
|
||||
|
|
|
@ -34,14 +34,17 @@ public class TopNParams
|
|||
protected TopNParams(
|
||||
DimensionSelector dimSelector,
|
||||
Cursor cursor,
|
||||
int cardinality,
|
||||
int numValuesPerPass
|
||||
)
|
||||
{
|
||||
this.dimSelector = dimSelector;
|
||||
this.cursor = cursor;
|
||||
this.cardinality = cardinality;
|
||||
this.cardinality = dimSelector.getValueCardinality();
|
||||
this.numValuesPerPass = numValuesPerPass;
|
||||
|
||||
if (cardinality < 0) {
|
||||
throw new UnsupportedOperationException("Cannot operate on a dimension without a dictionary");
|
||||
}
|
||||
}
|
||||
|
||||
public DimensionSelector getDimSelector()
|
||||
|
|
|
@ -23,6 +23,8 @@ package io.druid.segment;import io.druid.segment.data.IndexedInts;
|
|||
*/
|
||||
public interface DimensionSelector
|
||||
{
|
||||
public static int CARDINALITY_UNKNOWN = -1;
|
||||
|
||||
/**
|
||||
* Gets all values for the row inside of an IntBuffer. I.e. one possible implementation could be
|
||||
*
|
||||
|
@ -42,7 +44,12 @@ public interface DimensionSelector
|
|||
*
|
||||
* Value cardinality would be 2.
|
||||
*
|
||||
* @return the value cardinality
|
||||
* Cardinality may be unknown (e.g. the selector used by IncrementalIndex while reading input rows),
|
||||
* in which case this method will return -1. If cardinality is unknown, you should assume this
|
||||
* dimension selector has no dictionary, and avoid storing ids, calling "lookupId", or calling "lookupName"
|
||||
* outside of the context of operating on a single row.
|
||||
*
|
||||
* @return the value cardinality, or -1 if unknown.
|
||||
*/
|
||||
public int getValueCardinality();
|
||||
|
||||
|
|
|
@ -333,7 +333,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
|
|||
@Override
|
||||
public int getValueCardinality()
|
||||
{
|
||||
throw new UnsupportedOperationException("value cardinality is unknown in incremental index");
|
||||
return DimensionSelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -4589,19 +4589,11 @@ public class GroupByQueryRunnerTest
|
|||
.setGranularity(QueryRunnerTestHelper.allGran)
|
||||
.build();
|
||||
|
||||
// v2 strategy would throw an exception for this because the dimension selector in GroupByRowProcessor is not
|
||||
// dictionary encoded, but instead require the row to be set when doing lookup. Null Pointer Exception occurs
|
||||
// when the filter aggregator is initialized with no row set.
|
||||
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V2)) {
|
||||
expectedException.expect(NullPointerException.class);
|
||||
GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
} else {
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "rows", 837L)
|
||||
);
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "rows", 837L)
|
||||
);
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -48,7 +48,11 @@ import io.druid.query.aggregation.Aggregator;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.CountAggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.FilteredAggregatorFactory;
|
||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import io.druid.query.filter.BoundDimFilter;
|
||||
import io.druid.query.filter.SelectorDimFilter;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
import io.druid.query.timeseries.TimeseriesQuery;
|
||||
import io.druid.query.timeseries.TimeseriesQueryEngine;
|
||||
import io.druid.query.timeseries.TimeseriesQueryQueryToolChest;
|
||||
|
@ -289,6 +293,48 @@ public class IncrementalIndexTest
|
|||
Assert.assertEquals(Arrays.asList("4"), row.getDimension("dim2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilteredAggregators() throws Exception
|
||||
{
|
||||
long timestamp = System.currentTimeMillis();
|
||||
IncrementalIndex index = closer.closeLater(
|
||||
indexCreator.createIndex(new AggregatorFactory[]{
|
||||
new CountAggregatorFactory("count"),
|
||||
new FilteredAggregatorFactory(
|
||||
new CountAggregatorFactory("count_selector_filtered"),
|
||||
new SelectorDimFilter("dim2", "2", null)
|
||||
),
|
||||
new FilteredAggregatorFactory(
|
||||
new CountAggregatorFactory("count_bound_filtered"),
|
||||
new BoundDimFilter("dim2", "2", "3", false, true, null, null, StringComparators.NUMERIC)
|
||||
)
|
||||
})
|
||||
);
|
||||
|
||||
populateIndex(timestamp, index);
|
||||
Assert.assertEquals(Arrays.asList("dim1", "dim2"), index.getDimensionNames());
|
||||
Assert.assertEquals(
|
||||
Arrays.asList("count", "count_selector_filtered", "count_bound_filtered"),
|
||||
index.getMetricNames()
|
||||
);
|
||||
Assert.assertEquals(2, index.size());
|
||||
|
||||
final Iterator<Row> rows = index.iterator();
|
||||
Row row = rows.next();
|
||||
Assert.assertEquals(timestamp, row.getTimestampFromEpoch());
|
||||
Assert.assertEquals(Arrays.asList("1"), row.getDimension("dim1"));
|
||||
Assert.assertEquals(Arrays.asList("2"), row.getDimension("dim2"));
|
||||
Assert.assertEquals(1L, row.getLongMetric("count"));
|
||||
Assert.assertEquals(1L, row.getLongMetric("count_selector_filtered"));
|
||||
Assert.assertEquals(1L, row.getLongMetric("count_bound_filtered"));
|
||||
|
||||
row = rows.next();
|
||||
Assert.assertEquals(timestamp, row.getTimestampFromEpoch());
|
||||
Assert.assertEquals(1L, row.getLongMetric("count"));
|
||||
Assert.assertEquals(0L, row.getLongMetric("count_selector_filtered"));
|
||||
Assert.assertEquals(0L, row.getLongMetric("count_bound_filtered"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleThreadedIndexingAndQuery() throws Exception
|
||||
{
|
||||
|
|
|
@ -143,10 +143,10 @@ public class IncrementalIndexStorageAdapterTest
|
|||
Assert.assertEquals(2, results.size());
|
||||
|
||||
MapBasedRow row = (MapBasedRow) results.get(0);
|
||||
Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
|
||||
Assert.assertEquals(ImmutableMap.of("sally", "bo", "cnt", 1L), row.getEvent());
|
||||
|
||||
row = (MapBasedRow) results.get(1);
|
||||
Assert.assertEquals(ImmutableMap.of("sally", "bo", "cnt", 1L), row.getEvent());
|
||||
Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue