mirror of
https://github.com/apache/druid.git
synced 2025-02-28 22:39:19 +00:00
ExpressionFilter: Use index for expressions of single multi-value columns. (#10320)
Previously, this was disallowed, because expressions treated multi-values as nulls. But now, if there's a single multi-value column that can be mapped over, it's okay to use the index. Expression selectors already do this.
This commit is contained in:
parent
707b5aae2b
commit
f53785c52c
@ -45,13 +45,13 @@ import java.util.Set;
|
|||||||
public class ExpressionFilter implements Filter
|
public class ExpressionFilter implements Filter
|
||||||
{
|
{
|
||||||
private final Supplier<Expr> expr;
|
private final Supplier<Expr> expr;
|
||||||
private final Supplier<Set<String>> requiredBindings;
|
private final Supplier<Expr.BindingDetails> bindingDetails;
|
||||||
private final FilterTuning filterTuning;
|
private final FilterTuning filterTuning;
|
||||||
|
|
||||||
public ExpressionFilter(final Supplier<Expr> expr, final FilterTuning filterTuning)
|
public ExpressionFilter(final Supplier<Expr> expr, final FilterTuning filterTuning)
|
||||||
{
|
{
|
||||||
this.expr = expr;
|
this.expr = expr;
|
||||||
this.requiredBindings = Suppliers.memoize(() -> expr.get().analyzeInputs().getRequiredBindings());
|
this.bindingDetails = Suppliers.memoize(() -> expr.get().analyzeInputs());
|
||||||
this.filterTuning = filterTuning;
|
this.filterTuning = filterTuning;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,15 +107,17 @@ public class ExpressionFilter implements Filter
|
|||||||
@Override
|
@Override
|
||||||
public boolean supportsBitmapIndex(final BitmapIndexSelector selector)
|
public boolean supportsBitmapIndex(final BitmapIndexSelector selector)
|
||||||
{
|
{
|
||||||
if (requiredBindings.get().isEmpty()) {
|
final Expr.BindingDetails details = this.bindingDetails.get();
|
||||||
|
|
||||||
|
if (details.getRequiredBindings().isEmpty()) {
|
||||||
// Constant expression.
|
// Constant expression.
|
||||||
return true;
|
return true;
|
||||||
} else if (requiredBindings.get().size() == 1) {
|
} else if (details.getRequiredBindings().size() == 1) {
|
||||||
// Single-column expression. We can use bitmap indexes if this column has an index and does not have
|
// Single-column expression. We can use bitmap indexes if this column has an index and the expression can
|
||||||
// multiple values. The lack of multiple values is important because expression filters treat multi-value
|
// map over the values of the index.
|
||||||
// arrays as nulls, which doesn't permit index based filtering.
|
final String column = Iterables.getOnlyElement(details.getRequiredBindings());
|
||||||
final String column = Iterables.getOnlyElement(requiredBindings.get());
|
return selector.getBitmapIndex(column) != null
|
||||||
return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
|
&& ExpressionSelectors.canMapOverDictionary(details, selector.hasMultipleValues(column));
|
||||||
} else {
|
} else {
|
||||||
// Multi-column expression.
|
// Multi-column expression.
|
||||||
return false;
|
return false;
|
||||||
@ -131,7 +133,7 @@ public class ExpressionFilter implements Filter
|
|||||||
@Override
|
@Override
|
||||||
public <T> T getBitmapResult(final BitmapIndexSelector selector, final BitmapResultFactory<T> bitmapResultFactory)
|
public <T> T getBitmapResult(final BitmapIndexSelector selector, final BitmapResultFactory<T> bitmapResultFactory)
|
||||||
{
|
{
|
||||||
if (requiredBindings.get().isEmpty()) {
|
if (bindingDetails.get().getRequiredBindings().isEmpty()) {
|
||||||
// Constant expression.
|
// Constant expression.
|
||||||
if (expr.get().eval(ExprUtils.nilBindings()).asBoolean()) {
|
if (expr.get().eval(ExprUtils.nilBindings()).asBoolean()) {
|
||||||
return bitmapResultFactory.wrapAllTrue(Filters.allTrue(selector));
|
return bitmapResultFactory.wrapAllTrue(Filters.allTrue(selector));
|
||||||
@ -139,9 +141,11 @@ public class ExpressionFilter implements Filter
|
|||||||
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
|
return bitmapResultFactory.wrapAllFalse(Filters.allFalse(selector));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Can assume there's only one binding and it has a bitmap index, otherwise supportsBitmapIndex would have
|
// Can assume there's only one binding, it has a bitmap index, and it's a single input mapping.
|
||||||
// returned false and the caller should not have called us.
|
// Otherwise, supportsBitmapIndex would have returned false and the caller should not have called us.
|
||||||
final String column = Iterables.getOnlyElement(requiredBindings.get());
|
assert supportsBitmapIndex(selector);
|
||||||
|
|
||||||
|
final String column = Iterables.getOnlyElement(bindingDetails.get().getRequiredBindings());
|
||||||
return Filters.matchPredicate(
|
return Filters.matchPredicate(
|
||||||
column,
|
column,
|
||||||
selector,
|
selector,
|
||||||
@ -175,7 +179,7 @@ public class ExpressionFilter implements Filter
|
|||||||
@Override
|
@Override
|
||||||
public Set<String> getRequiredColumns()
|
public Set<String> getRequiredColumns()
|
||||||
{
|
{
|
||||||
return requiredBindings.get();
|
return bindingDetails.get().getRequiredBindings();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -210,7 +214,6 @@ public class ExpressionFilter implements Filter
|
|||||||
{
|
{
|
||||||
return "ExpressionFilter{" +
|
return "ExpressionFilter{" +
|
||||||
"expr=" + expr +
|
"expr=" + expr +
|
||||||
", requiredBindings=" + requiredBindings +
|
|
||||||
", filterTuning=" + filterTuning +
|
", filterTuning=" + filterTuning +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
@ -226,9 +226,7 @@ public class ExpressionSelectors
|
|||||||
if (capabilities != null
|
if (capabilities != null
|
||||||
&& capabilities.getType() == ValueType.STRING
|
&& capabilities.getType() == ValueType.STRING
|
||||||
&& capabilities.isDictionaryEncoded().isTrue()
|
&& capabilities.isDictionaryEncoded().isTrue()
|
||||||
&& !capabilities.hasMultipleValues().isUnknown()
|
&& canMapOverDictionary(exprDetails, capabilities.hasMultipleValues())
|
||||||
&& !exprDetails.hasInputArrays()
|
|
||||||
&& !exprDetails.isOutputArray()
|
|
||||||
) {
|
) {
|
||||||
return new SingleStringInputDimensionSelector(
|
return new SingleStringInputDimensionSelector(
|
||||||
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),
|
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ValueType.STRING)),
|
||||||
@ -339,6 +337,25 @@ public class ExpressionSelectors
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether an expression can be applied to unique values of a particular column (like those in a dictionary)
|
||||||
|
* rather than being applied to each row individually.
|
||||||
|
*
|
||||||
|
* This function should only be called if you have already determined that an expression is over a single column,
|
||||||
|
* and that single column has a dictionary.
|
||||||
|
*
|
||||||
|
* @param exprDetails result of calling {@link Expr#analyzeInputs()} on an expression
|
||||||
|
* @param hasMultipleValues result of calling {@link ColumnCapabilities#hasMultipleValues()}
|
||||||
|
*/
|
||||||
|
public static boolean canMapOverDictionary(
|
||||||
|
final Expr.BindingDetails exprDetails,
|
||||||
|
final ColumnCapabilities.Capable hasMultipleValues
|
||||||
|
)
|
||||||
|
{
|
||||||
|
Preconditions.checkState(exprDetails.getRequiredBindings().size() == 1, "requiredBindings.size == 1");
|
||||||
|
return !hasMultipleValues.isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingDetails} which
|
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link Expr.BindingDetails} which
|
||||||
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
|
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
|
||||||
|
@ -143,6 +143,7 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||||||
assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9"));
|
assertFilterMatchesSkipVectorize(edf("dim3 < 2.0"), ImmutableList.of("3", "4", "6", "9"));
|
||||||
}
|
}
|
||||||
assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9"));
|
assertFilterMatchesSkipVectorize(edf("like(dim3, '1%')"), ImmutableList.of("1", "3", "4", "6", "9"));
|
||||||
|
assertFilterMatchesSkipVectorize(edf("array_contains(dim3, '1')"), ImmutableList.of("3", "4", "6"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -158,6 +159,16 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||||||
assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), ImmutableList.of("0"));
|
assertFilterMatchesSkipVectorize(edf("dim4 == '1'"), ImmutableList.of("0"));
|
||||||
assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), ImmutableList.of("3"));
|
assertFilterMatchesSkipVectorize(edf("dim4 == '3'"), ImmutableList.of("3"));
|
||||||
assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4", "5"));
|
assertFilterMatchesSkipVectorize(edf("dim4 == '4'"), ImmutableList.of("4", "5"));
|
||||||
|
assertFilterMatchesSkipVectorize(edf("concat(dim4, dim4) == '33'"), ImmutableList.of("3"));
|
||||||
|
assertFilterMatchesSkipVectorize(edf("like(dim4, '4%')"), ImmutableList.of("4", "5"));
|
||||||
|
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, '5')"), ImmutableList.of("4", "5"));
|
||||||
|
assertFilterMatchesSkipVectorize(edf("array_to_string(dim4, ':') == '4:5'"), ImmutableList.of("4", "5"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleAndMultiValuedStringColumn()
|
||||||
|
{
|
||||||
|
assertFilterMatchesSkipVectorize(edf("array_contains(dim4, dim3)"), ImmutableList.of("5", "9"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -284,7 +295,7 @@ public class ExpressionFilterTest extends BaseFilterTest
|
|||||||
public void testEqualsContract()
|
public void testEqualsContract()
|
||||||
{
|
{
|
||||||
EqualsVerifier.forClass(ExpressionFilter.class)
|
EqualsVerifier.forClass(ExpressionFilter.class)
|
||||||
.withIgnoredFields("requiredBindings")
|
.withIgnoredFields("bindingDetails")
|
||||||
.usingGetClass()
|
.usingGetClass()
|
||||||
.verify();
|
.verify();
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,7 @@ import org.apache.druid.segment.Cursor;
|
|||||||
import org.apache.druid.segment.DimensionSelector;
|
import org.apache.druid.segment.DimensionSelector;
|
||||||
import org.apache.druid.segment.TestObjectColumnSelector;
|
import org.apache.druid.segment.TestObjectColumnSelector;
|
||||||
import org.apache.druid.segment.VirtualColumns;
|
import org.apache.druid.segment.VirtualColumns;
|
||||||
|
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||||
@ -58,7 +59,95 @@ import java.util.List;
|
|||||||
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
||||||
{
|
{
|
||||||
@Test
|
@Test
|
||||||
public void testSupplierFromDimensionSelector()
|
public void test_canMapOverDictionary_oneSingleValueInput()
|
||||||
|
{
|
||||||
|
Assert.assertTrue(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.FALSE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneSingleValueInputSpecifiedTwice()
|
||||||
|
{
|
||||||
|
Assert.assertTrue(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("concat(dim1, dim1) == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.FALSE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneMultiValueInput()
|
||||||
|
{
|
||||||
|
Assert.assertTrue(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.TRUE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneUnknownInput()
|
||||||
|
{
|
||||||
|
Assert.assertFalse(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.UNKNOWN
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneSingleValueInputInArrayContext()
|
||||||
|
{
|
||||||
|
Assert.assertFalse(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.FALSE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneMultiValueInputInArrayContext()
|
||||||
|
{
|
||||||
|
Assert.assertFalse(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.TRUE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary_oneUnknownInputInArrayContext()
|
||||||
|
{
|
||||||
|
Assert.assertFalse(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("array_contains(dim1, 2)", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.UNKNOWN
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_canMapOverDictionary()
|
||||||
|
{
|
||||||
|
Assert.assertTrue(
|
||||||
|
ExpressionSelectors.canMapOverDictionary(
|
||||||
|
Parser.parse("dim1 == 2", ExprMacroTable.nil()).analyzeInputs(),
|
||||||
|
ColumnCapabilities.Capable.FALSE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_supplierFromDimensionSelector()
|
||||||
{
|
{
|
||||||
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
||||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromDimensionSelector(
|
final Supplier<Object> supplier = ExpressionSelectors.supplierFromDimensionSelector(
|
||||||
@ -77,7 +166,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSupplierFromObjectSelectorObject()
|
public void test_supplierFromObjectSelector_onObject()
|
||||||
{
|
{
|
||||||
final SettableSupplier<Object> settableSupplier = new SettableSupplier<>();
|
final SettableSupplier<Object> settableSupplier = new SettableSupplier<>();
|
||||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||||
@ -101,7 +190,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSupplierFromObjectSelectorNumber()
|
public void test_supplierFromObjectSelector_onNumber()
|
||||||
{
|
{
|
||||||
final SettableSupplier<Number> settableSupplier = new SettableSupplier<>();
|
final SettableSupplier<Number> settableSupplier = new SettableSupplier<>();
|
||||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||||
@ -120,7 +209,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSupplierFromObjectSelectorString()
|
public void test_supplierFromObjectSelector_onString()
|
||||||
{
|
{
|
||||||
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
final SettableSupplier<String> settableSupplier = new SettableSupplier<>();
|
||||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||||
@ -138,7 +227,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSupplierFromObjectSelectorList()
|
public void test_supplierFromObjectSelector_onList()
|
||||||
{
|
{
|
||||||
final SettableSupplier<List> settableSupplier = new SettableSupplier<>();
|
final SettableSupplier<List> settableSupplier = new SettableSupplier<>();
|
||||||
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
final Supplier<Object> supplier = ExpressionSelectors.supplierFromObjectSelector(
|
||||||
@ -154,7 +243,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCoerceListToArray()
|
public void test_coerceListToArray()
|
||||||
{
|
{
|
||||||
List<Long> longList = ImmutableList.of(1L, 2L, 3L);
|
List<Long> longList = ImmutableList.of(1L, 2L, 3L);
|
||||||
Assert.assertArrayEquals(new Long[]{1L, 2L, 3L}, (Long[]) ExpressionSelectors.coerceListToArray(longList));
|
Assert.assertArrayEquals(new Long[]{1L, 2L, 3L}, (Long[]) ExpressionSelectors.coerceListToArray(longList));
|
||||||
@ -225,7 +314,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCoerceExprToValue()
|
public void test_coerceEvalToSelectorObject()
|
||||||
{
|
{
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
ImmutableList.of(1L, 2L, 3L),
|
ImmutableList.of(1L, 2L, 3L),
|
||||||
@ -253,7 +342,7 @@ public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIncrementIndexStringSelector() throws IndexSizeExceededException
|
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException
|
||||||
{
|
{
|
||||||
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
|
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
|
||||||
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
|
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
|
||||||
|
Loading…
x
Reference in New Issue
Block a user