ExpressionFilter: Use index for expressions of single multi-value columns. (#10320)

Previously, this was disallowed, because expressions treated multi-values
as nulls. But now, if there's a single multi-value column that can be
mapped over, it's okay to use the index. Expression selectors already do
this.
This commit is contained in:
Gian Merlino 2020-08-24 23:29:31 -07:00 committed by GitHub
parent 707b5aae2b
commit f53785c52c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 147 additions and 27 deletions

View File

@ -45,13 +45,13 @@ import java.util.Set;
public class ExpressionFilter implements Filter
{
private final Supplier<Expr> expr;
private final Supplier<Set<String>> requiredBindings;
private final Supplier<Expr.BindingDetails> bindingDetails;
private final FilterTuning filterTuning;
public ExpressionFilter(final Supplier<Expr> expr, final FilterTuning filterTuning)
{
this.expr = expr;
this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getRequiredBindings());
this.bindingDetails = Suppliers.memoize(() -> expr.get().analyzeInputs());
this.filterTuning = filterTuning;
}
@ -107,15 +107,17 @@ public class ExpressionFilter implements Filter
@Override
public boolean supportsBitmapIndex(final BitmapIndexSelector selector)
{
if (requiredBindings.get().isEmpty()) {
final Expr.BindingDetails details = this.bindingDetails.get();
if (details.getRequiredBindings().isEmpty()) {
// Constant expression.
return true;
} else if (requiredBindings.get().size() == 1) {
// Single-column expression. We can use bitmap indexes if this column has an index and does not have
// multiple values. The lack of multiple values is important because expression filters treat multi-value
// arrays as nulls, which doesn't permit index based filtering.
final String column = Iterables.getOnlyElement(requiredBindings.get());
return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
} else if (details.getRequiredBindings().size() == 1) {
// Single-column expression. We can use bitmap indexes if this column has an index and the expression can
// map over the values of the index.
final String column = Iterables.getOnlyElement(details.getRequiredBindings());
return selector.getBitmapIndex(column) != null
&& ExpressionSelectors.canMapOverDictionary(details, selector.hasMultipleValues(column));
} else {
// Multi-column expression.
return false;
@ -131,7 +133,7 @@ public class ExpressionFilter implements Filter
@Override
public <T> T getBitmapResult(final BitmapIndexSelector selector, final BitmapResultFactory<T> bitmapResultFactory)
{
if (requiredBindings.get().isEmpty()) {
if (bindingDetails.get().getRequiredBindings().isEmpty()) {
// Constant expression.
if (expr.get().eval(ExprUtils.nilBindings()).asBoolean()) {
return bitmapResultFactory.wrapAllTrue(Filters.allTrue(selector));
@ -139,9 +141,11 @@ public class ExpressionFilter implements Filter
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
}
} else {
// Can assume there's only one binding and it has a bitmap index, otherwise supportsBitmapIndex would have
// returned false and the caller should not have called us.
final String column = Iterables.getOnlyElement(requiredBindings.get());
// Can assume there's only one binding, it has a bitmap index, and it's a single input mapping.
// Otherwise, supportsBitmapIndex would have returned false and the caller should not have called us.
assert supportsBitmapIndex(selector);
final String column = Iterables.getOnlyElement(bindingDetails.get().getRequiredBindings());
return Filters.matchPredicate(
column,
selector,
@ -175,7 +179,7 @@ public class ExpressionFilter implements Filter
@Override
public Set<String> getRequiredColumns()
{
return requiredBindings.get();
return bindingDetails.get().getRequiredBindings();
}
@Override
@ -210,7 +214,6 @@ public class ExpressionFilter implements Filter
{
return "ExpressionFilter{" +
"expr=" + expr +
", requiredBindings=" + requiredBindings +
", filterTuning=" + filterTuning +
'}';
}

View File

@ -226,9 +226,7 @@ public class ExpressionSelectors
if (capabilities != null
&& capabilities.getType() == ValueType.STRING
&& capabilities.isDictionaryEncoded().isTrue()
&& !capabilities.hasMultipleValues().isUnknown()
&& !exprDetails.hasInputArrays()
&& !exprDetails.isOutputArray()
&& canMapOverDictionary(exprDetails, capabilities.hasMultipleValues())
) {
return new SingleStringInputDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),
@ -339,6 +337,25 @@ public class ExpressionSelectors
}
}
/**
* Returns whether an expression can be applied to unique values of a particular column (like those in a dictionary)
* rather than being applied to each row individually.
*
* This function should only be called if you have already determined that an expression is over a single column,
* and that single column has a dictionary.
*
* @param exprDetails result of calling {@link Expr#analyzeInputs()} on an expression
* @param hasMultipleValues result of calling {@link ColumnCapabilities#hasMultipleValues()}
*/
public static boolean canMapOverDictionary(
final Expr.BindingDetails exprDetails,
final ColumnCapabilities.Capable hasMultipleValues
)
{
Preconditions.checkState(exprDetails.getRequiredBindings().size() == 1, "requiredBindings.size == 1");
return !hasMultipleValues.isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray();
}
/**
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingDetails} which
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they

View File

@ -143,6 +143,7 @@ public class ExpressionFilterTest extends BaseFilterTest
assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9"));
}
assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9"));
assertFilterMatchesSkipVectorize(edf("array_contains(dim3, '1')"), ImmutableList.of("3", "4", "6"));
}
@Test
@ -158,6 +159,16 @@ public class ExpressionFilterTest extends BaseFilterTest
assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), ImmutableList.of("0"));
assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), ImmutableList.of("3"));
assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4", "5"));
assertFilterMatchesSkipVectorize(edf("concat(dim4, dim4) == '33'"), ImmutableList.of("3"));
assertFilterMatchesSkipVectorize(edf("like(dim4, '4%')"), ImmutableList.of("4", "5"));
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, '5')"), ImmutableList.of("4", "5"));
assertFilterMatchesSkipVectorize(edf("array_to_string(dim4, ':') == '4:5'"), ImmutableList.of("4", "5"));
}
@Test
public void testSingleAndMultiValuedStringColumn()
{
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, dim3)"), ImmutableList.of("5", "9"));
}
@Test
@ -284,7 +295,7 @@ public class ExpressionFilterTest extends BaseFilterTest
public void testEqualsContract()
{
EqualsVerifier.forClass(ExpressionFilter.class)
.withIgnoredFields("requiredBindings")
.withIgnoredFields("bindingDetails")
.usingGetClass()
.verify();
}

View File

@ -43,6 +43,7 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.TestObjectColumnSelector;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
@ -58,7 +59,95 @@ import java.util.List;
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
{
@Test
public void testSupplierFromDimensionSelector()
public void test_canMapOverDictionary_oneSingleValueInput()
{
Assert.assertTrue(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.FALSE
)
);
}
@Test
public void test_canMapOverDictionary_oneSingleValueInputSpecifiedTwice()
{
Assert.assertTrue(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("concat(dim1, dim1) == 2", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.FALSE
)
);
}
@Test
public void test_canMapOverDictionary_oneMultiValueInput()
{
Assert.assertTrue(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.TRUE
)
);
}
@Test
public void test_canMapOverDictionary_oneUnknownInput()
{
Assert.assertFalse(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.UNKNOWN
)
);
}
@Test
public void test_canMapOverDictionary_oneSingleValueInputInArrayContext()
{
Assert.assertFalse(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.FALSE
)
);
}
@Test
public void test_canMapOverDictionary_oneMultiValueInputInArrayContext()
{
Assert.assertFalse(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.TRUE
)
);
}
@Test
public void test_canMapOverDictionary_oneUnknownInputInArrayContext()
{
Assert.assertFalse(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.UNKNOWN
)
);
}
@Test
public void test_canMapOverDictionary()
{
Assert.assertTrue(
ExpressionSelectors.canMapOverDictionary(
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
ColumnCapabilities.Capable.FALSE
)
);
}
@Test
public void test_supplierFromDimensionSelector()
{
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
final Supplier<Object> supplier = ExpressionSelectors.supplierFromDimensionSelector(
@ -77,7 +166,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testSupplierFromObjectSelectorObject()
public void test_supplierFromObjectSelector_onObject()
{
final SettableSupplier<Object> settableSupplier = new SettableSupplier<>();
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
@ -101,7 +190,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testSupplierFromObjectSelectorNumber()
public void test_supplierFromObjectSelector_onNumber()
{
final SettableSupplier<Number> settableSupplier = new SettableSupplier<>();
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
@ -120,7 +209,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testSupplierFromObjectSelectorString()
public void test_supplierFromObjectSelector_onString()
{
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
@ -138,7 +227,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testSupplierFromObjectSelectorList()
public void test_supplierFromObjectSelector_onList()
{
final SettableSupplier<List> settableSupplier = new SettableSupplier<>();
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
@ -154,7 +243,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testCoerceListToArray()
public void test_coerceListToArray()
{
List<Long> longList = ImmutableList.of(1L, 2L, 3L);
Assert.assertArrayEquals(new Long[]{1L, 2L, 3L}, (Long[]) ExpressionSelectors.coerceListToArray(longList));
@ -225,7 +314,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testCoerceExprToValue()
public void test_coerceEvalToSelectorObject()
{
Assert.assertEquals(
ImmutableList.of(1L, 2L, 3L),
@ -253,7 +342,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
}
@Test
public void testIncrementIndexStringSelector() throws IndexSizeExceededException
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException
{
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an