From a6776648112917b72c077ba3ac0cb7f61993a2d0 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 6 Mar 2020 12:53:32 -0800 Subject: [PATCH] allow optimization of single multi-value column input expr with repeated identifier (#9425) * allow optimization of single multi-value column input expr with repeated identifier * add test --- .../segment/virtual/ExpressionSelectors.java | 8 +- .../SingleStringInputDimensionSelector.java | 5 - .../druid/query/MultiValuedDimensionTest.java | 5 - .../virtual/ExpressionVirtualColumnTest.java | 128 ++++++++++++++++++ 4 files changed, 129 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 56dd6d00c64..345274aa6e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -223,19 +223,13 @@ public class ExpressionSelectors // Optimization for dimension selectors that wrap a single underlying string column. // The string column can be multi-valued, but if so, it must be implicitly mappable (i.e. the expression is - // not treating it as an array, not wanting to output an array, and the multi-value dimension appears - // exactly once). + // not treating it as an array and not wanting to output an array if (capabilities != null && capabilities.getType() == ValueType.STRING && capabilities.isDictionaryEncoded() && capabilities.isComplete() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() - // the following condition specifically is to handle the case of when a multi-value column identifier - // appears more than once in an expression, - // e.g. 'x + x' is fine if 'x' is scalar, but if 'x' is multi-value it should be translated to - // 'cartesian_map((x_1, x_2) -> x_1 + x_2, x, x)' - && (!capabilities.hasMultipleValues() || exprDetails.getFreeVariables().size() == 1) ) { return new SingleStringInputDimensionSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)), diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java index 00c9d58001b..0a6cee87afd 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleStringInputDimensionSelector.java @@ -48,11 +48,6 @@ public class SingleStringInputDimensionSelector implements DimensionSelector final Expr expression ) { - // Verify expression has just one binding. - if (expression.analyzeInputs().getRequiredBindings().size() != 1) { - throw new ISE("WTF?! Expected expression with just one binding"); - } - // Verify selector has a working dictionary. if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN || !selector.nameLookupPossibleInAdvance()) { diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index da4a25809b7..d67e234d494 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -576,10 +576,6 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest @Test public void testGroupByExpressionMultiMultiAutoAutoDupeIdentifier() { - if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); - } GroupByQuery query = GroupByQuery .builder() .setDataSource("xx") @@ -615,7 +611,6 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t7t7", "count", 2L) ); - System.out.println(result.toList()); TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto-self"); } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 82a9d8020e3..c871e2e3ddb 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.virtual; +import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -35,18 +36,24 @@ import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.BucketExtractionFn; import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseFloatColumnValueSelector; import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.RowBasedColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; +import javax.annotation.Nullable; import java.util.Arrays; public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest @@ -175,6 +182,20 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest TestExprMacroTable.INSTANCE ); + private static final ExpressionVirtualColumn SCALE_LIST_SELF_IMPLICIT = new ExpressionVirtualColumn( + "expr", + "b * b", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ); + + private static final ExpressionVirtualColumn SCALE_LIST_SELF_EXPLICIT = new ExpressionVirtualColumn( + "expr", + "map(b -> b * b, b)", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ); + private static final ThreadLocal CURRENT_ROW = new ThreadLocal<>(); private static final ColumnSelectorFactory COLUMN_SELECTOR_FACTORY = RowBasedColumnSelectorFactory.create( CURRENT_ROW::get, @@ -226,6 +247,113 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest Assert.assertEquals(Arrays.asList("6.0", NullHandling.replaceWithDefault() ? "0.0" : null, "10.0"), selectorExplicit.getObject()); } + @Test + public void testMultiObjectSelectorMakesRightSelector() + { + DimensionSpec spec = new DefaultDimensionSpec("expr", "expr"); + + // do some ugly faking to test if SingleStringInputDimensionSelector is created for multi-value expressions when possible + ColumnSelectorFactory factory = new ColumnSelectorFactory() + { + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + { + DimensionSelector delegate = COLUMN_SELECTOR_FACTORY.makeDimensionSelector(dimensionSpec); + DimensionSelector faker = new DimensionSelector() + { + @Override + public IndexedInts getRow() + { + return delegate.getRow(); + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return delegate.makeValueMatcher(value); + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return delegate.makeValueMatcher(predicate); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + delegate.inspectRuntimeShape(inspector); + } + + @Nullable + @Override + public Object getObject() + { + return delegate.getObject(); + } + + @Override + public Class classOfObject() + { + return delegate.classOfObject(); + } + + @Override + public int getValueCardinality() + { + // value doesn't matter as long as not CARDINALITY_UNKNOWN + return 3; + } + + @Nullable + @Override + public String lookupName(int id) + { + return null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + // fake this so when SingleStringInputDimensionSelector it doesn't explode + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return name -> 0; + } + }; + return faker; + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) + { + return COLUMN_SELECTOR_FACTORY.makeColumnValueSelector(columnName); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return new ColumnCapabilitiesImpl().setType(ValueType.STRING) + .setHasMultipleValues(true) + .setDictionaryEncoded(true) + .setIsComplete(true); + } + }; + final BaseObjectColumnValueSelector selectorImplicit = + SCALE_LIST_SELF_IMPLICIT.makeDimensionSelector(spec, factory); + final BaseObjectColumnValueSelector selectorExplicit = + SCALE_LIST_SELF_EXPLICIT.makeDimensionSelector(spec, factory); + + Assert.assertTrue(selectorImplicit instanceof SingleStringInputDimensionSelector); + Assert.assertTrue(selectorExplicit instanceof MultiValueExpressionDimensionSelector); + } + @Test public void testLongSelector() {