allow optimization of single multi-value column input expr with repeated identifier (#9425)

* allow optimization of single multi-value column input expr with repeated identifier

* add test
This commit is contained in:
Clint Wylie 2020-03-06 12:53:32 -08:00 committed by GitHub
parent eda03630d0
commit a677664811
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 17 deletions

View File

@ -223,19 +223,13 @@ public class ExpressionSelectors
// Optimization for dimension selectors that wrap a single underlying string column.
// The string column can be multi-valued, but if so, it must be implicitly mappable (i.e. the expression is
// not treating it as an array, not wanting to output an array, and the multi-value dimension appears
// exactly once).
// not treating it as an array and not wanting to output an array
if (capabilities != null
&& capabilities.getType() == ValueType.STRING
&& capabilities.isDictionaryEncoded()
&& capabilities.isComplete()
&& !exprDetails.hasInputArrays()
&& !exprDetails.isOutputArray()
// the following condition specifically is to handle the case of when a multi-value column identifier
// appears more than once in an expression,
// e.g. 'x + x' is fine if 'x' is scalar, but if 'x' is multi-value it should be translated to
// 'cartesian_map((x_1, x_2) -> x_1 + x_2, x, x)'
&& (!capabilities.hasMultipleValues() || exprDetails.getFreeVariables().size() == 1)
) {
return new SingleStringInputDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),

View File

@ -48,11 +48,6 @@ public class SingleStringInputDimensionSelector implements DimensionSelector
final Expr expression
)
{
// Verify expression has just one binding.
if (expression.analyzeInputs().getRequiredBindings().size() != 1) {
throw new ISE("WTF?! Expected expression with just one binding");
}
// Verify selector has a working dictionary.
if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN
|| !selector.nameLookupPossibleInAdvance()) {

View File

@ -576,10 +576,6 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest
@Test
public void testGroupByExpressionMultiMultiAutoAutoDupeIdentifier()
{
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
}
GroupByQuery query = GroupByQuery
.builder()
.setDataSource("xx")
@ -615,7 +611,6 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t7t7", "count", 2L)
);
System.out.println(result.toList());
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi-auto-auto-self");
}

View File

@ -19,6 +19,7 @@
package org.apache.druid.segment.virtual;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@ -35,18 +36,24 @@ import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.BucketExtractionFn;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseFloatColumnValueSelector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.RowBasedColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import javax.annotation.Nullable;
import java.util.Arrays;
public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
@ -175,6 +182,20 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
TestExprMacroTable.INSTANCE
);
private static final ExpressionVirtualColumn SCALE_LIST_SELF_IMPLICIT = new ExpressionVirtualColumn(
"expr",
"b * b",
ValueType.STRING,
TestExprMacroTable.INSTANCE
);
private static final ExpressionVirtualColumn SCALE_LIST_SELF_EXPLICIT = new ExpressionVirtualColumn(
"expr",
"map(b -> b * b, b)",
ValueType.STRING,
TestExprMacroTable.INSTANCE
);
private static final ThreadLocal<Row> CURRENT_ROW = new ThreadLocal<>();
private static final ColumnSelectorFactory COLUMN_SELECTOR_FACTORY = RowBasedColumnSelectorFactory.create(
CURRENT_ROW::get,
@ -226,6 +247,113 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
Assert.assertEquals(Arrays.asList("6.0", NullHandling.replaceWithDefault() ? "0.0" : null, "10.0"), selectorExplicit.getObject());
}
@Test
public void testMultiObjectSelectorMakesRightSelector()
{
DimensionSpec spec = new DefaultDimensionSpec("expr", "expr");
// do some ugly faking to test if SingleStringInputDimensionSelector is created for multi-value expressions when possible
ColumnSelectorFactory factory = new ColumnSelectorFactory()
{
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
{
DimensionSelector delegate = COLUMN_SELECTOR_FACTORY.makeDimensionSelector(dimensionSpec);
DimensionSelector faker = new DimensionSelector()
{
@Override
public IndexedInts getRow()
{
return delegate.getRow();
}
@Override
public ValueMatcher makeValueMatcher(@Nullable String value)
{
return delegate.makeValueMatcher(value);
}
@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate)
{
return delegate.makeValueMatcher(predicate);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
delegate.inspectRuntimeShape(inspector);
}
@Nullable
@Override
public Object getObject()
{
return delegate.getObject();
}
@Override
public Class<?> classOfObject()
{
return delegate.classOfObject();
}
@Override
public int getValueCardinality()
{
// value doesn't matter as long as not CARDINALITY_UNKNOWN
return 3;
}
@Nullable
@Override
public String lookupName(int id)
{
return null;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
// fake this so when SingleStringInputDimensionSelector it doesn't explode
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return name -> 0;
}
};
return faker;
}
@Override
public ColumnValueSelector makeColumnValueSelector(String columnName)
{
return COLUMN_SELECTOR_FACTORY.makeColumnValueSelector(columnName);
}
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setHasMultipleValues(true)
.setDictionaryEncoded(true)
.setIsComplete(true);
}
};
final BaseObjectColumnValueSelector selectorImplicit =
SCALE_LIST_SELF_IMPLICIT.makeDimensionSelector(spec, factory);
final BaseObjectColumnValueSelector selectorExplicit =
SCALE_LIST_SELF_EXPLICIT.makeDimensionSelector(spec, factory);
Assert.assertTrue(selectorImplicit instanceof SingleStringInputDimensionSelector);
Assert.assertTrue(selectorExplicit instanceof MultiValueExpressionDimensionSelector);
}
@Test
public void testLongSelector()
{