mirror of https://github.com/apache/druid.git
ExpressionFilter: Use index for expressions of single multi-value columns. (#10320)
Previously, this was disallowed, because expressions treated multi-values as nulls. But now, if there's a single multi-value column that can be mapped over, it's okay to use the index. Expression selectors already do this.
This commit is contained in:
parent
707b5aae2b
commit
f53785c52c
|
@ -45,13 +45,13 @@ import java.util.Set;
|
|||
public class ExpressionFilter implements Filter
|
||||
{
|
||||
private final Supplier<Expr> expr;
|
||||
private final Supplier<Set<String>> requiredBindings;
|
||||
private final Supplier<Expr.BindingDetails> bindingDetails;
|
||||
private final FilterTuning filterTuning;
|
||||
|
||||
public ExpressionFilter(final Supplier<Expr> expr, final FilterTuning filterTuning)
|
||||
{
|
||||
this.expr = expr;
|
||||
this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getRequiredBindings());
|
||||
this.bindingDetails = Suppliers.memoize(() -> expr.get().analyzeInputs());
|
||||
this.filterTuning = filterTuning;
|
||||
}
|
||||
|
||||
|
@ -107,15 +107,17 @@ public class ExpressionFilter implements Filter
|
|||
@Override
|
||||
public boolean supportsBitmapIndex(final BitmapIndexSelector selector)
|
||||
{
|
||||
if (requiredBindings.get().isEmpty()) {
|
||||
final Expr.BindingDetails details = this.bindingDetails.get();
|
||||
|
||||
if (details.getRequiredBindings().isEmpty()) {
|
||||
// Constant expression.
|
||||
return true;
|
||||
} else if (requiredBindings.get().size() == 1) {
|
||||
// Single-column expression. We can use bitmap indexes if this column has an index and does not have
|
||||
// multiple values. The lack of multiple values is important because expression filters treat multi-value
|
||||
// arrays as nulls, which doesn't permit index based filtering.
|
||||
final String column = Iterables.getOnlyElement(requiredBindings.get());
|
||||
return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
|
||||
} else if (details.getRequiredBindings().size() == 1) {
|
||||
// Single-column expression. We can use bitmap indexes if this column has an index and the expression can
|
||||
// map over the values of the index.
|
||||
final String column = Iterables.getOnlyElement(details.getRequiredBindings());
|
||||
return selector.getBitmapIndex(column) != null
|
||||
&& ExpressionSelectors.canMapOverDictionary(details, selector.hasMultipleValues(column));
|
||||
} else {
|
||||
// Multi-column expression.
|
||||
return false;
|
||||
|
@ -131,7 +133,7 @@ public class ExpressionFilter implements Filter
|
|||
@Override
|
||||
public <T> T getBitmapResult(final BitmapIndexSelector selector, final BitmapResultFactory<T> bitmapResultFactory)
|
||||
{
|
||||
if (requiredBindings.get().isEmpty()) {
|
||||
if (bindingDetails.get().getRequiredBindings().isEmpty()) {
|
||||
// Constant expression.
|
||||
if (expr.get().eval(ExprUtils.nilBindings()).asBoolean()) {
|
||||
return bitmapResultFactory.wrapAllTrue(Filters.allTrue(selector));
|
||||
|
@ -139,9 +141,11 @@ public class ExpressionFilter implements Filter
|
|||
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
|
||||
}
|
||||
} else {
|
||||
// Can assume there's only one binding and it has a bitmap index, otherwise supportsBitmapIndex would have
|
||||
// returned false and the caller should not have called us.
|
||||
final String column = Iterables.getOnlyElement(requiredBindings.get());
|
||||
// Can assume there's only one binding, it has a bitmap index, and it's a single input mapping.
|
||||
// Otherwise, supportsBitmapIndex would have returned false and the caller should not have called us.
|
||||
assert supportsBitmapIndex(selector);
|
||||
|
||||
final String column = Iterables.getOnlyElement(bindingDetails.get().getRequiredBindings());
|
||||
return Filters.matchPredicate(
|
||||
column,
|
||||
selector,
|
||||
|
@ -175,7 +179,7 @@ public class ExpressionFilter implements Filter
|
|||
@Override
|
||||
public Set<String> getRequiredColumns()
|
||||
{
|
||||
return requiredBindings.get();
|
||||
return bindingDetails.get().getRequiredBindings();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -210,7 +214,6 @@ public class ExpressionFilter implements Filter
|
|||
{
|
||||
return "ExpressionFilter{" +
|
||||
"expr=" + expr +
|
||||
", requiredBindings=" + requiredBindings +
|
||||
", filterTuning=" + filterTuning +
|
||||
'}';
|
||||
}
|
||||
|
|
|
@ -226,9 +226,7 @@ public class ExpressionSelectors
|
|||
if (capabilities != null
|
||||
&& capabilities.getType() == ValueType.STRING
|
||||
&& capabilities.isDictionaryEncoded().isTrue()
|
||||
&& !capabilities.hasMultipleValues().isUnknown()
|
||||
&& !exprDetails.hasInputArrays()
|
||||
&& !exprDetails.isOutputArray()
|
||||
&& canMapOverDictionary(exprDetails, capabilities.hasMultipleValues())
|
||||
) {
|
||||
return new SingleStringInputDimensionSelector(
|
||||
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),
|
||||
|
@ -339,6 +337,25 @@ public class ExpressionSelectors
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether an expression can be applied to unique values of a particular column (like those in a dictionary)
|
||||
* rather than being applied to each row individually.
|
||||
*
|
||||
* This function should only be called if you have already determined that an expression is over a single column,
|
||||
* and that single column has a dictionary.
|
||||
*
|
||||
* @param exprDetails result of calling {@link Expr#analyzeInputs()} on an expression
|
||||
* @param hasMultipleValues result of calling {@link ColumnCapabilities#hasMultipleValues()}
|
||||
*/
|
||||
public static boolean canMapOverDictionary(
|
||||
final Expr.BindingDetails exprDetails,
|
||||
final ColumnCapabilities.Capable hasMultipleValues
|
||||
)
|
||||
{
|
||||
Preconditions.checkState(exprDetails.getRequiredBindings().size() == 1, "requiredBindings.size == 1");
|
||||
return !hasMultipleValues.isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingDetails} which
|
||||
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
|
||||
|
|
|
@ -143,6 +143,7 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||
assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9"));
|
||||
}
|
||||
assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9"));
|
||||
assertFilterMatchesSkipVectorize(edf("array_contains(dim3, '1')"), ImmutableList.of("3", "4", "6"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -158,6 +159,16 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||
assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), ImmutableList.of("0"));
|
||||
assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), ImmutableList.of("3"));
|
||||
assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4", "5"));
|
||||
assertFilterMatchesSkipVectorize(edf("concat(dim4, dim4) == '33'"), ImmutableList.of("3"));
|
||||
assertFilterMatchesSkipVectorize(edf("like(dim4, '4%')"), ImmutableList.of("4", "5"));
|
||||
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, '5')"), ImmutableList.of("4", "5"));
|
||||
assertFilterMatchesSkipVectorize(edf("array_to_string(dim4, ':') == '4:5'"), ImmutableList.of("4", "5"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleAndMultiValuedStringColumn()
|
||||
{
|
||||
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, dim3)"), ImmutableList.of("5", "9"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -284,7 +295,7 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||
public void testEqualsContract()
|
||||
{
|
||||
EqualsVerifier.forClass(ExpressionFilter.class)
|
||||
.withIgnoredFields("requiredBindings")
|
||||
.withIgnoredFields("bindingDetails")
|
||||
.usingGetClass()
|
||||
.verify();
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.druid.segment.Cursor;
|
|||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.TestObjectColumnSelector;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||
|
@ -58,7 +59,95 @@ import java.util.List;
|
|||
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
||||
{
|
||||
@Test
|
||||
public void testSupplierFromDimensionSelector()
|
||||
public void test_canMapOverDictionary_oneSingleValueInput()
|
||||
{
|
||||
Assert.assertTrue(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.FALSE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneSingleValueInputSpecifiedTwice()
|
||||
{
|
||||
Assert.assertTrue(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("concat(dim1, dim1) == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.FALSE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneMultiValueInput()
|
||||
{
|
||||
Assert.assertTrue(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.TRUE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneUnknownInput()
|
||||
{
|
||||
Assert.assertFalse(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.UNKNOWN
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneSingleValueInputInArrayContext()
|
||||
{
|
||||
Assert.assertFalse(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.FALSE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneMultiValueInputInArrayContext()
|
||||
{
|
||||
Assert.assertFalse(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.TRUE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary_oneUnknownInputInArrayContext()
|
||||
{
|
||||
Assert.assertFalse(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.UNKNOWN
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_canMapOverDictionary()
|
||||
{
|
||||
Assert.assertTrue(
|
||||
ExpressionSelectors.canMapOverDictionary(
|
||||
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||
ColumnCapabilities.Capable.FALSE
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_supplierFromDimensionSelector()
|
||||
{
|
||||
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromDimensionSelector(
|
||||
|
@ -77,7 +166,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSupplierFromObjectSelectorObject()
|
||||
public void test_supplierFromObjectSelector_onObject()
|
||||
{
|
||||
final SettableSupplier<Object> settableSupplier = new SettableSupplier<>();
|
||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||
|
@ -101,7 +190,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSupplierFromObjectSelectorNumber()
|
||||
public void test_supplierFromObjectSelector_onNumber()
|
||||
{
|
||||
final SettableSupplier<Number> settableSupplier = new SettableSupplier<>();
|
||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||
|
@ -120,7 +209,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSupplierFromObjectSelectorString()
|
||||
public void test_supplierFromObjectSelector_onString()
|
||||
{
|
||||
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||
|
@ -138,7 +227,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSupplierFromObjectSelectorList()
|
||||
public void test_supplierFromObjectSelector_onList()
|
||||
{
|
||||
final SettableSupplier<List> settableSupplier = new SettableSupplier<>();
|
||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||
|
@ -154,7 +243,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testCoerceListToArray()
|
||||
public void test_coerceListToArray()
|
||||
{
|
||||
List<Long> longList = ImmutableList.of(1L, 2L, 3L);
|
||||
Assert.assertArrayEquals(new Long[]{1L, 2L, 3L}, (Long[]) ExpressionSelectors.coerceListToArray(longList));
|
||||
|
@ -225,7 +314,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testCoerceExprToValue()
|
||||
public void test_coerceEvalToSelectorObject()
|
||||
{
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(1L, 2L, 3L),
|
||||
|
@ -253,7 +342,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementIndexStringSelector() throws IndexSizeExceededException
|
||||
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException
|
||||
{
|
||||
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
|
||||
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
|
||||
|
|
Loading…
Reference in New Issue