fix filtering bug in filtering unnest cols and dim cols: Received a non-applicable rewrite (#14587)

This commit is contained in:
Pranav 2023-08-16 17:57:16 -07:00 committed by GitHub
parent f585f0a8ed
commit 26d82fd342
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 741 additions and 133 deletions

View File

@ -38,6 +38,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.segment.filter.AndFilter;
import org.apache.druid.segment.filter.BoundFilter;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.filter.LikeFilter;
@ -314,84 +315,29 @@ public class UnnestStorageAdapter implements StorageAdapter
filtersPushedDownToBaseCursor -> null (as the filter cannot be re-written due to presence of virtual columns)
filtersForPostUnnestCursor -> d12 IN (a,b) or m1 < 10
*/
class FilterSplitter
{
final List<Filter> filtersPushedDownToBaseCursor = new ArrayList<>();
final List<Filter> filtersForPostUnnestCursor = new ArrayList<>();
void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters)
{
if (filter == null) {
return;
}
if (!skipPreFilters) {
final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites);
if (newFilter != null) {
// Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting
// any rows that do not match this filter at all.
filtersPushedDownToBaseCursor.add(newFilter);
}
}
// Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values.
filtersForPostUnnestCursor.add(filter);
}
void addPreFilter(@Nullable final Filter filter)
{
if (filter == null) {
return;
}
final Set<String> requiredColumns = filter.getRequiredColumns();
// Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call
// that perhaps forces the code to use a ValueMatcher where an index would've been available,
// which can have real performance implications. This is an interim choice made to value correctness
// over performance. When we need to optimize this performance, we should be able to
// create a VirtualColumnDatasource that contains all the virtual columns, in which case the query
// itself would stop carrying them and everything should be able to be pushed down.
if (queryVirtualColumns.getVirtualColumns().length > 0) {
for (String column : requiredColumns) {
if (queryVirtualColumns.exists(column)) {
filtersForPostUnnestCursor.add(filter);
return;
}
}
}
filtersPushedDownToBaseCursor.add(filter);
}
}
final FilterSplitter filterSplitter = new FilterSplitter();
final FilterSplitter filterSplitter = new FilterSplitter(inputColumn, inputColumnCapabilites, queryVirtualColumns);
if (queryFilter != null) {
List<Filter> preFilterList = new ArrayList<>();
final int origFilterSize;
if (queryFilter.getRequiredColumns().contains(outputColumnName)) {
// outside filter contains unnested column
// requires check for OR
if (queryFilter instanceof OrFilter) {
origFilterSize = ((OrFilter) queryFilter).getFilters().size();
for (Filter filter : ((OrFilter) queryFilter).getFilters()) {
if (filter.getRequiredColumns().contains(outputColumnName)) {
final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(
filter,
inputColumn,
inputColumnCapabilites
);
if (newFilter != null) {
preFilterList.add(newFilter);
}
} else {
preFilterList.add(filter);
// requires check for OR and And filters, disqualify rewrite for non-unnest filters
if (queryFilter instanceof BooleanFilter) {
boolean isTopLevelAndFilter = queryFilter instanceof AndFilter;
List<Filter> preFilterList = recursiveRewriteOnUnnestFilters(
(BooleanFilter) queryFilter,
inputColumn,
inputColumnCapabilites,
filterSplitter,
isTopLevelAndFilter
);
// If rewite on entire query filter is successful then add entire filter to preFilter else skip and only add to post filter.
if (filterSplitter.getPreFilterCount() == filterSplitter.getOriginalFilterCount()) {
if (queryFilter instanceof AndFilter) {
filterSplitter.addPreFilter(new AndFilter(preFilterList));
} else if (queryFilter instanceof OrFilter) {
filterSplitter.addPreFilter(new OrFilter(preFilterList));
}
}
if (preFilterList.size() == origFilterSize) {
// there has been successful rewrites
final OrFilter preOrFilter = new OrFilter(preFilterList);
filterSplitter.addPreFilter(preOrFilter);
}
// add the entire query filter to unnest filter to be used in Value matcher
filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true);
} else {
@ -412,7 +358,173 @@ public class UnnestStorageAdapter implements StorageAdapter
);
}
class FilterSplitter
{
private String inputColumn;
private ColumnCapabilities inputColumnCapabilites;
private VirtualColumns queryVirtualColumns;
private int originalFilterCount = 0;
private int preFilterCount = 0;
public FilterSplitter(
String inputColumn,
ColumnCapabilities inputColumnCapabilites,
VirtualColumns queryVirtualColumns
)
{
this.inputColumn = inputColumn;
this.inputColumnCapabilites = inputColumnCapabilites;
this.queryVirtualColumns = queryVirtualColumns;
}
final List<Filter> filtersPushedDownToBaseCursor = new ArrayList<>();
final List<Filter> filtersForPostUnnestCursor = new ArrayList<>();
void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters)
{
if (filter == null) {
return;
}
if (!skipPreFilters) {
final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites);
if (newFilter != null) {
// Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting
// any rows that do not match this filter at all.
filtersPushedDownToBaseCursor.add(newFilter);
}
}
// Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values.
filtersForPostUnnestCursor.add(filter);
}
void addPreFilter(@Nullable final Filter filter)
{
if (filter == null) {
return;
}
final Set<String> requiredColumns = filter.getRequiredColumns();
// Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call
// that perhaps forces the code to use a ValueMatcher where an index would've been available,
// which can have real performance implications. This is an interim choice made to value correctness
// over performance. When we need to optimize this performance, we should be able to
// create a VirtualColumnDatasource that contains all the virtual columns, in which case the query
// itself would stop carrying them and everything should be able to be pushed down.
if (queryVirtualColumns.getVirtualColumns().length > 0) {
for (String column : requiredColumns) {
if (queryVirtualColumns.exists(column)) {
filtersForPostUnnestCursor.add(filter);
return;
}
}
}
filtersPushedDownToBaseCursor.add(filter);
}
public void addToOriginalFilterCount(int c)
{
originalFilterCount += c;
}
public void addToPreFilterCount(int c)
{
preFilterCount += c;
}
public int getOriginalFilterCount()
{
return originalFilterCount;
}
public int getPreFilterCount()
{
return preFilterCount;
}
}
/**
* handles the nested rewrite for unnest columns in recursive way,
* it loops through all and/or filters and rewrite only required filters in the child and add it to preFilter if qualified
* or else skip adding it to preFilters.
* RULES:
* 1. Add to preFilters only when top level filter is AND.
* for example: a=1 and (b=2 or c=2) , In this case a=1 can be added as preFilters but we can not add b=2 as preFilters.
* 2. If Top level is OR filter then we can either choose to add entire top level OR filter to preFilter or skip it all together.
* for example: a=1 or (b=2 and c=2)
* 3. Filters on unnest column which is derived from Array or any other Expression can not be pushe down to base.
* for example: a=1 and vc=3 , lets say vc is ExpressionVirtualColumn, and vc=3 can not be push down to base even if top level is AND filter.
* A. Unnesting a single dimension e.g. select * from foo, UNNEST(MV_TO_ARRAY(dim3)) as u(d3)
* B. Unnesting an expression from multiple columns e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12)
* In case A, d3 is a direct reference to dim3 so any filter using d3 can be rewritten using dim3 and added to pre filter
* while in case B, due to presence of the expression virtual column expressionVirtualColumn("j0.unnest", "array(\"dim1\",\"dim2\")", ColumnType.STRING_ARRAY)
* the filters on d12 cannot be pushed to the pre filters
*
* @param queryFilter query filter passed to makeCursors
* @param inputColumn input column to unnest if it's a direct access; otherwise null
* @param inputColumnCapabilites input column capabilities if known; otherwise null
*/
private List<Filter> recursiveRewriteOnUnnestFilters(
BooleanFilter queryFilter,
final String inputColumn,
final ColumnCapabilities inputColumnCapabilites,
final FilterSplitter filterSplitter,
final boolean isTopLevelAndFilter
)
{
final List<Filter> preFilterList = new ArrayList<>();
for (Filter filter : queryFilter.getFilters()) {
if (filter.getRequiredColumns().contains(outputColumnName)) {
if (filter instanceof AndFilter) {
preFilterList.add(new AndFilter(recursiveRewriteOnUnnestFilters(
(BooleanFilter) filter,
inputColumn,
inputColumnCapabilites,
filterSplitter,
isTopLevelAndFilter
)));
} else if (filter instanceof OrFilter) {
// in case of Or Fiters, we set isTopLevelAndFilter to false that prevents pushing down any child filters to base
List<Filter> orChildFilters = recursiveRewriteOnUnnestFilters(
(BooleanFilter) filter,
inputColumn,
inputColumnCapabilites,
filterSplitter,
false
);
preFilterList.add(new OrFilter(orChildFilters));
} else {
final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(
filter,
inputColumn,
inputColumnCapabilites
);
if (newFilter != null) {
// this is making sure that we are not pushing the unnest columns filters to base filter without rewriting.
preFilterList.add(newFilter);
filterSplitter.addToPreFilterCount(1);
}
/*
Push down the filters to base only if top level is And Filter
we can not push down if top level filter is OR or unnestColumn is derived expression like arrays
*/
if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) {
filterSplitter.addPreFilter(newFilter != null ? newFilter : filter);
}
filterSplitter.addToOriginalFilterCount(1);
}
} else {
preFilterList.add(filter);
// for filters on non unnest columns, we still need to count the nested filters if any as we are not traversing it in this method
int filterCount = Filters.countNumberOfFilters(filter);
filterSplitter.addToOriginalFilterCount(filterCount);
filterSplitter.addToPreFilterCount(filterCount);
}
}
return preFilterList;
}
/**
* Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null.
*/
@ -465,7 +577,6 @@ public class UnnestStorageAdapter implements StorageAdapter
return false;
}
}
return true;
} else if (filter instanceof NotFilter) {
return filterMapsOverMultiValueStrings(((NotFilter) filter).getBaseFilter());

View File

@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.filter.BooleanFilter;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.DruidPredicateFactory;
@ -422,4 +423,19 @@ public class Filters
return retVal;
}
public static int countNumberOfFilters(@Nullable Filter filter)
{
if (filter == null) {
return 0;
}
if (filter instanceof BooleanFilter) {
return ((BooleanFilter) filter).getFilters()
.stream()
.map(f -> countNumberOfFilters(f))
.mapToInt(Integer::intValue)
.sum();
}
return 1;
}
}

View File

@ -20,8 +20,8 @@
package org.apache.druid.segment;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.Sequence;
@ -29,26 +29,13 @@ import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.query.filter.LikeDimFilter;
import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.filter.AndFilter;
import org.apache.druid.segment.filter.BoundFilter;
import org.apache.druid.segment.filter.ColumnComparisonFilter;
import org.apache.druid.segment.filter.FalseFilter;
import org.apache.druid.segment.filter.LikeFilter;
import org.apache.druid.segment.filter.NotFilter;
import org.apache.druid.segment.filter.OrFilter;
import org.apache.druid.segment.filter.SelectorFilter;
import org.apache.druid.segment.filter.TrueFilter;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
@ -72,6 +59,10 @@ import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.List;
import static org.apache.druid.segment.filter.FilterTestUtils.selector;
import static org.apache.druid.segment.filter.Filters.and;
import static org.apache.druid.segment.filter.Filters.or;
public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
{
private static Closer CLOSER;
@ -286,13 +277,13 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final OrFilter baseFilter = new OrFilter(ImmutableList.of(
new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(),
new SelectorDimFilter(inputColumn, "2", null).toFilter()
selector(OUTPUT_COLUMN_NAME, "1"),
selector(inputColumn, "2")
));
final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of(
new SelectorDimFilter(inputColumn, "1", null).toFilter(),
new SelectorDimFilter(inputColumn, "2", null).toFilter()
selector(inputColumn, "1"),
selector(inputColumn, "2")
));
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
@ -316,14 +307,182 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
return null;
});
}
@Test
public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters()
{
final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
new TestStorageAdapter(INCREMENTAL_INDEX),
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
null
);
final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final OrFilter baseFilter = new OrFilter(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "1"),
new AndFilter(ImmutableList.of(
selector(inputColumn, "2"),
selector(OUTPUT_COLUMN_NAME, "10")
))
));
final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of(
selector(inputColumn, "1"),
new AndFilter(ImmutableList.of(
selector(inputColumn, "2"),
selector(inputColumn, "10")
))
));
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
baseFilter,
unnestStorageAdapter.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
final TestStorageAdapter base = (TestStorageAdapter) unnestStorageAdapter.getBaseAdapter();
final Filter pushDownFilter = base.getPushDownFilter();
Assert.assertEquals(expectedPushDownFilter, pushDownFilter);
cursorSequence.accumulate(null, (accumulated, cursor) -> {
Assert.assertEquals(cursor.getClass(), PostJoinCursor.class);
final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter();
// OR-case so base filter should match the postJoinFilter
Assert.assertEquals(baseFilter, postFilter);
return null;
});
}
@Test
public void test_nested_filters_unnested_and_topLevel1And3filtersInOR()
{
final Filter testQueryFilter = and(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
or(ImmutableList.of(
selector("newcol", "2"),
selector(COLUMNNAME, "2"),
selector(OUTPUT_COLUMN_NAME, "1")
))
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || multi-string1 = 1))",
"(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))"
);
}
@Test
public void test_nested_multiLevel_filters_unnested()
{
final Filter testQueryFilter = and(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
or(ImmutableList.of(
or(ImmutableList.of(
selector("newcol", "2"),
selector(COLUMNNAME, "2"),
and(ImmutableList.of(
selector("newcol", "3"),
selector(COLUMNNAME, "7")
))
)),
selector(OUTPUT_COLUMN_NAME, "1")
))
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || multi-string1 = 1))",
"(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))"
);
}
@Test
public void test_nested_multiLevel_filters_unnested5Level()
{
final Filter testQueryFilter = or(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
or(ImmutableList.of(
or(ImmutableList.of(
selector("newcol", "2"),
selector(COLUMNNAME, "2"),
and(ImmutableList.of(
selector("newcol", "3"),
and(ImmutableList.of(
selector(COLUMNNAME, "7"),
selector("newcol_1", "10")
))
))
)),
selector(OUTPUT_COLUMN_NAME, "1")
))
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || multi-string1 = 1)",
"(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 = 1)"
);
}
@Test
public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR()
{
final Filter testQueryFilter = or(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
and(ImmutableList.of(
selector("newcol", "2"),
selector(COLUMNNAME, "2"),
selector(OUTPUT_COLUMN_NAME, "1")
))
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && multi-string1 = 1))",
"(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))"
);
}
@Test
public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs()
{
final Filter testQueryFilter = and(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
or(ImmutableList.of(
selector("newcol", "2"),
selector(COLUMNNAME, "2")
)),
or(ImmutableList.of(
selector("newcol", "4"),
selector(COLUMNNAME, "8"),
selector(OUTPUT_COLUMN_NAME, "6")
))
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || multi-string1 = 6))",
"(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))"
);
}
@Test
public void test_nested_filters_unnested_and_topLevelAND2sdf()
{
final Filter testQueryFilter = and(ImmutableList.of(
selector(OUTPUT_COLUMN_NAME, "3"),
selector(COLUMNNAME, "2")
));
testComputeBaseAndPostUnnestFilters(
testQueryFilter,
"(multi-string1 = 3 && multi-string1 = 2)",
"(unnested-multi-string1 = 3 && multi-string1 = 2)"
);
}
@Test
public void test_pushdown_filters_unnested_dimension_with_unnest_adapters()
{
final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
new TestStorageAdapter(INCREMENTAL_INDEX),
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null)
new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null)
);
final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
@ -331,7 +490,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final Filter expectedPushDownFilter =
new SelectorDimFilter(inputColumn, "1", null).toFilter();
selector(inputColumn, "1");
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
@ -377,10 +536,9 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
final Filter expectedPushDownFilter =
new SelectorDimFilter(inputColumn, "1", null).toFilter();
selector(inputColumn, "1");
final Filter queryFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter();
final Filter queryFilter = new SelectorFilter(OUTPUT_COLUMN_NAME, "1", null);
final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
queryFilter,
unnestStorageAdapter.getInterval(),
@ -409,45 +567,32 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
});
}
@Test
public void testAllowedFiltersForPushdown()
public void testComputeBaseAndPostUnnestFilters(
Filter testQueryFilter,
String expectedBasePushDown,
String expectedPostUnnest
)
{
Filter[] allowed = new Filter[] {
new SelectorFilter("column", "value"),
new InDimFilter("column", ImmutableSet.of("a", "b")),
new LikeFilter("column", null, LikeDimFilter.LikeMatcher.from("hello%", null), null),
new BoundFilter(
new BoundDimFilter("column", "a", "b", true, true, null, null, null)
),
NullFilter.forColumn("column"),
new EqualityFilter("column", ColumnType.LONG, 1234L, null),
new RangeFilter("column", ColumnType.LONG, 0L, 1234L, true, false, null)
};
// not exhaustive
Filter[] notAllowed = new Filter[] {
TrueFilter.instance(),
FalseFilter.instance(),
new ColumnComparisonFilter(ImmutableList.of(DefaultDimensionSpec.of("col1"), DefaultDimensionSpec.of("col2")))
};
for (Filter f : allowed) {
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f));
}
for (Filter f : notAllowed) {
Assert.assertFalse(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f));
}
Filter notAnd = new NotFilter(
new AndFilter(
Arrays.asList(allowed)
)
final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(UNNEST_STORAGE_ADAPTER.getUnnestColumn());
final VirtualColumn vc = UNNEST_STORAGE_ADAPTER.getUnnestColumn();
Pair<Filter, Filter> filterPair = UNNEST_STORAGE_ADAPTER.computeBaseAndPostUnnestFilters(
testQueryFilter,
null,
VirtualColumns.EMPTY,
inputColumn,
vc.capabilities(UNNEST_STORAGE_ADAPTER, inputColumn)
);
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(notAnd));
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new OrFilter(Arrays.asList(allowed))));
Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(notAnd)));
Assert.assertFalse(
UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(new OrFilter(Arrays.asList(notAllowed))))
Filter actualPushDownFilter = filterPair.lhs;
Filter actualPostUnnestFilter = filterPair.rhs;
Assert.assertEquals(
"Expects only top level child of And Filter to push down to base",
expectedBasePushDown,
actualPushDownFilter == null ? "" : actualPushDownFilter.toString()
);
Assert.assertEquals(
"Should have post unnest filter",
expectedPostUnnest,
actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString()
);
}
}

View File

@ -2880,6 +2880,253 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testUnnestThriceWithFiltersOnDimAndUnnestCol()
{
cannotVectorize();
String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n"
+ " ( SELECT * FROM \n"
+ " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n"
+ " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) "
+ " WHERE dimZipf=27 AND dim3_unnest1='Baz'";
List<Query<?>> expectedQuerySc = ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
UnnestDataSource.create(
UnnestDataSource.create(
FilteredDataSource.create(
UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE5),
expressionVirtualColumn(
"j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
null
),
and(
NullHandling.sqlCompatible()
? equality("dimZipf", "27", ColumnType.LONG)
: bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC),
equality("j0.unnest", "Baz", ColumnType.STRING)
)
),
expressionVirtualColumn(
"_j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
), null
),
expressionVirtualColumn(
"__j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
null
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.virtualColumns(expressionVirtualColumn(
"v0",
"'Baz'",
ColumnType.STRING
))
.columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0"))
.build()
);
testQuery(
sql,
QUERY_CONTEXT_UNNEST,
expectedQuerySc,
ImmutableList.of(
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Hello"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Hello"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Hello", "Baz"},
new Object[]{"27", "Baz", "Hello", "Baz"},
new Object[]{"27", "Baz", "Hello", "Hello"},
new Object[]{"27", "Baz", "Hello", "World"},
new Object[]{"27", "Baz", "World", "Baz"},
new Object[]{"27", "Baz", "World", "Baz"},
new Object[]{"27", "Baz", "World", "Hello"},
new Object[]{"27", "Baz", "World", "World"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Hello"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Baz"},
new Object[]{"27", "Baz", "Baz", "Hello"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Hello", "Baz"},
new Object[]{"27", "Baz", "Hello", "Baz"},
new Object[]{"27", "Baz", "Hello", "Hello"},
new Object[]{"27", "Baz", "Hello", "World"},
new Object[]{"27", "Baz", "World", "Baz"},
new Object[]{"27", "Baz", "World", "Baz"},
new Object[]{"27", "Baz", "World", "Hello"},
new Object[]{"27", "Baz", "World", "World"}
)
);
}
@Test
public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns()
{
cannotVectorize();
String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n"
+ " ( SELECT * FROM \n"
+ " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n"
+ " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) "
+ " WHERE dimZipf=27 AND dim3_unnest1='Baz' AND dim3_unnest2='Hello' AND dim3_unnest3='World'";
List<Query<?>> expectedQuerySc = ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
UnnestDataSource.create(
UnnestDataSource.create(
FilteredDataSource.create(
UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE5),
expressionVirtualColumn(
"j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
null
),
and(
NullHandling.sqlCompatible()
? equality("dimZipf", "27", ColumnType.LONG)
: bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC),
equality("j0.unnest", "Baz", ColumnType.STRING)
)
),
expressionVirtualColumn(
"_j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
), equality("_j0.unnest", "Hello", ColumnType.STRING)
),
expressionVirtualColumn(
"__j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
equality("__j0.unnest", "World", ColumnType.STRING)
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.virtualColumns(expressionVirtualColumn(
"v0",
"'Baz'",
ColumnType.STRING
))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0"))
.build()
);
testQuery(
sql,
QUERY_CONTEXT_UNNEST,
expectedQuerySc,
ImmutableList.of(
new Object[]{"27", "Baz", "Hello", "World"},
new Object[]{"27", "Baz", "Hello", "World"}
)
);
}
@Test
public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations()
{
cannotVectorize();
skipVectorize();
String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n"
+ " ( SELECT * FROM \n"
+ " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )"
+ " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n"
+ " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) "
+ " WHERE dimZipf=27 AND (dim3_unnest1='Baz' OR dim3_unnest2='Hello') AND dim3_unnest3='World'";
List<Query<?>> expectedQuerySqlCom = ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
UnnestDataSource.create(
FilteredDataSource.create(
UnnestDataSource.create(
FilteredDataSource.create(
UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE5),
expressionVirtualColumn(
"j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
null
),
NullHandling.sqlCompatible() ? equality("dimZipf", "27", ColumnType.LONG) : range(
"dimZipf",
ColumnType.LONG,
"27",
"27",
false,
false
)
),
expressionVirtualColumn(
"_j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
null
),
or(
equality("j0.unnest", "Baz", ColumnType.STRING),
equality("_j0.unnest", "Hello", ColumnType.STRING)
) // (j0.unnest = Baz || _j0.unnest = Hello)
),
expressionVirtualColumn(
"__j0.unnest",
"\"dimMultivalEnumerated\"",
ColumnType.STRING
),
equality("__j0.unnest", "World", ColumnType.STRING)
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest"))
.build()
);
testQuery(
sql,
QUERY_CONTEXT_UNNEST, expectedQuerySqlCom,
ImmutableList.of(
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Hello", "World"},
new Object[]{"27", "Baz", "World", "World"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Baz", "World"},
new Object[]{"27", "Baz", "Hello", "World"},
new Object[]{"27", "Baz", "World", "World"},
new Object[]{"27", "Hello", "Hello", "World"},
new Object[]{"27", "World", "Hello", "World"}
)
);
}
@Test
public void testUnnestWithGroupBy()
{
@ -3148,6 +3395,95 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testUnnestVirtualWithColumns1()
{
// This tells the test to skip generating (vectorize = force) path
// Generates only 1 native query with vectorize = false
skipVectorize();
// This tells that both vectorize = force and vectorize = false takes the same path of non vectorization
// Generates 2 native queries with 2 different values of vectorize
cannotVectorize();
testQuery(
"SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' and (m1<=10 or strings='b'))",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn(
"j0.unnest",
"\"dim3\"",
ColumnType.STRING
),
equality("j0.unnest", "a", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.filters(or(
NullHandling.sqlCompatible()
? range("m1", ColumnType.LONG, null, "10", false, false)
: bound(
"m1",
null,
"10",
false,
false,
null,
StringComparators.NUMERIC
),
equality("j0.unnest", "b", ColumnType.STRING)
))
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest", "m1"))
.build()),
ImmutableList.of(new Object[]{"a", 1.0f})
);
}
@Test
public void testUnnestVirtualWithColumns2()
{
// This tells the test to skip generating (vectorize = force) path
// Generates only 1 native query with vectorize = false
skipVectorize();
// This tells that both vectorize = force and vectorize = false takes the same path of non vectorization
// Generates 2 native queries with 2 different values of vectorize
cannotVectorize();
testQuery(
"SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' or (m1=2 and strings='b'))",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn(
"j0.unnest",
"\"dim3\"",
ColumnType.STRING
),
null
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false) // (j0.unnest = a || (m1 = 2 && j0.unnest = b))
.filters(or(
equality("j0.unnest", "a", ColumnType.STRING),
and(
NullHandling.sqlCompatible()
? equality("m1", "2", ColumnType.FLOAT)
: equality("m1", "2", ColumnType.STRING),
equality("j0.unnest", "b", ColumnType.STRING)
)
))
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest", "m1"))
.build()),
ImmutableList.of(
new Object[]{"a", 1.0f},
new Object[]{"b", 2.0f}
)
);
}
@Test
public void testUnnestWithFilters()
{