From 26d82fd342de4ea40bc189542738023722e592af Mon Sep 17 00:00:00 2001 From: Pranav Date: Wed, 16 Aug 2023 17:57:16 -0700 Subject: [PATCH] fix filtering bug in filtering unnest cols and dim cols: Received a non-applicable rewrite (#14587) --- .../druid/segment/UnnestStorageAdapter.java | 257 ++++++++++---- .../apache/druid/segment/filter/Filters.java | 16 + .../segment/UnnestStorageAdapterTest.java | 265 ++++++++++---- .../sql/calcite/CalciteArraysQueryTest.java | 336 ++++++++++++++++++ 4 files changed, 741 insertions(+), 133 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 048cd10f8ef..71a1809a885 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -38,6 +38,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; +import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.LikeFilter; @@ -314,84 +315,29 @@ public class UnnestStorageAdapter implements StorageAdapter filtersPushedDownToBaseCursor -> null (as the filter cannot be re-written due to presence of virtual columns) filtersForPostUnnestCursor -> d12 IN (a,b) or m1 < 10 */ - class FilterSplitter - { - final List filtersPushedDownToBaseCursor = new ArrayList<>(); - final List filtersForPostUnnestCursor = new ArrayList<>(); - - void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) - { - if (filter == null) { - return; - } - if (!skipPreFilters) { - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); - if (newFilter != null) { - // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting - // any rows that do not match this filter at all. - filtersPushedDownToBaseCursor.add(newFilter); - } - } - // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. - filtersForPostUnnestCursor.add(filter); - } - - void addPreFilter(@Nullable final Filter filter) - { - if (filter == null) { - return; - } - - final Set requiredColumns = filter.getRequiredColumns(); - - // Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call - // that perhaps forces the code to use a ValueMatcher where an index would've been available, - // which can have real performance implications. This is an interim choice made to value correctness - // over performance. When we need to optimize this performance, we should be able to - // create a VirtualColumnDatasource that contains all the virtual columns, in which case the query - // itself would stop carrying them and everything should be able to be pushed down. - if (queryVirtualColumns.getVirtualColumns().length > 0) { - for (String column : requiredColumns) { - if (queryVirtualColumns.exists(column)) { - filtersForPostUnnestCursor.add(filter); - return; - } - } - } - filtersPushedDownToBaseCursor.add(filter); - - } - } - - final FilterSplitter filterSplitter = new FilterSplitter(); + final FilterSplitter filterSplitter = new FilterSplitter(inputColumn, inputColumnCapabilites, queryVirtualColumns); if (queryFilter != null) { - List preFilterList = new ArrayList<>(); - final int origFilterSize; if (queryFilter.getRequiredColumns().contains(outputColumnName)) { // outside filter contains unnested column - // requires check for OR - if (queryFilter instanceof OrFilter) { - origFilterSize = ((OrFilter) queryFilter).getFilters().size(); - for (Filter filter : ((OrFilter) queryFilter).getFilters()) { - if (filter.getRequiredColumns().contains(outputColumnName)) { - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( - filter, - inputColumn, - inputColumnCapabilites - ); - if (newFilter != null) { - preFilterList.add(newFilter); - } - } else { - preFilterList.add(filter); + // requires check for OR and And filters, disqualify rewrite for non-unnest filters + if (queryFilter instanceof BooleanFilter) { + boolean isTopLevelAndFilter = queryFilter instanceof AndFilter; + List preFilterList = recursiveRewriteOnUnnestFilters( + (BooleanFilter) queryFilter, + inputColumn, + inputColumnCapabilites, + filterSplitter, + isTopLevelAndFilter + ); + // If rewite on entire query filter is successful then add entire filter to preFilter else skip and only add to post filter. + if (filterSplitter.getPreFilterCount() == filterSplitter.getOriginalFilterCount()) { + if (queryFilter instanceof AndFilter) { + filterSplitter.addPreFilter(new AndFilter(preFilterList)); + } else if (queryFilter instanceof OrFilter) { + filterSplitter.addPreFilter(new OrFilter(preFilterList)); } } - if (preFilterList.size() == origFilterSize) { - // there has been successful rewrites - final OrFilter preOrFilter = new OrFilter(preFilterList); - filterSplitter.addPreFilter(preOrFilter); - } // add the entire query filter to unnest filter to be used in Value matcher filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true); } else { @@ -412,7 +358,173 @@ public class UnnestStorageAdapter implements StorageAdapter ); } + class FilterSplitter + { + private String inputColumn; + private ColumnCapabilities inputColumnCapabilites; + private VirtualColumns queryVirtualColumns; + private int originalFilterCount = 0; + private int preFilterCount = 0; + + public FilterSplitter( + String inputColumn, + ColumnCapabilities inputColumnCapabilites, + VirtualColumns queryVirtualColumns + ) + { + this.inputColumn = inputColumn; + this.inputColumnCapabilites = inputColumnCapabilites; + this.queryVirtualColumns = queryVirtualColumns; + } + + final List filtersPushedDownToBaseCursor = new ArrayList<>(); + final List filtersForPostUnnestCursor = new ArrayList<>(); + + void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) + { + if (filter == null) { + return; + } + if (!skipPreFilters) { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. + filtersPushedDownToBaseCursor.add(newFilter); + } + } + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + filtersForPostUnnestCursor.add(filter); + } + + void addPreFilter(@Nullable final Filter filter) + { + if (filter == null) { + return; + } + + final Set requiredColumns = filter.getRequiredColumns(); + + // Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call + // that perhaps forces the code to use a ValueMatcher where an index would've been available, + // which can have real performance implications. This is an interim choice made to value correctness + // over performance. When we need to optimize this performance, we should be able to + // create a VirtualColumnDatasource that contains all the virtual columns, in which case the query + // itself would stop carrying them and everything should be able to be pushed down. + if (queryVirtualColumns.getVirtualColumns().length > 0) { + for (String column : requiredColumns) { + if (queryVirtualColumns.exists(column)) { + filtersForPostUnnestCursor.add(filter); + return; + } + } + } + filtersPushedDownToBaseCursor.add(filter); + + } + + public void addToOriginalFilterCount(int c) + { + originalFilterCount += c; + } + + public void addToPreFilterCount(int c) + { + preFilterCount += c; + } + + public int getOriginalFilterCount() + { + return originalFilterCount; + } + + public int getPreFilterCount() + { + return preFilterCount; + } + } + + /** + * handles the nested rewrite for unnest columns in recursive way, + * it loops through all and/or filters and rewrite only required filters in the child and add it to preFilter if qualified + * or else skip adding it to preFilters. + * RULES: + * 1. Add to preFilters only when top level filter is AND. + * for example: a=1 and (b=2 or c=2) , In this case a=1 can be added as preFilters but we can not add b=2 as preFilters. + * 2. If Top level is OR filter then we can either choose to add entire top level OR filter to preFilter or skip it all together. + * for example: a=1 or (b=2 and c=2) + * 3. Filters on unnest column which is derived from Array or any other Expression can not be pushe down to base. + * for example: a=1 and vc=3 , lets say vc is ExpressionVirtualColumn, and vc=3 can not be push down to base even if top level is AND filter. + * A. Unnesting a single dimension e.g. select * from foo, UNNEST(MV_TO_ARRAY(dim3)) as u(d3) + * B. Unnesting an expression from multiple columns e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) + * In case A, d3 is a direct reference to dim3 so any filter using d3 can be rewritten using dim3 and added to pre filter + * while in case B, due to presence of the expression virtual column expressionVirtualColumn("j0.unnest", "array(\"dim1\",\"dim2\")", ColumnType.STRING_ARRAY) + * the filters on d12 cannot be pushed to the pre filters + * + * @param queryFilter query filter passed to makeCursors + * @param inputColumn input column to unnest if it's a direct access; otherwise null + * @param inputColumnCapabilites input column capabilities if known; otherwise null + */ + private List recursiveRewriteOnUnnestFilters( + BooleanFilter queryFilter, + final String inputColumn, + final ColumnCapabilities inputColumnCapabilites, + final FilterSplitter filterSplitter, + final boolean isTopLevelAndFilter + ) + { + final List preFilterList = new ArrayList<>(); + for (Filter filter : queryFilter.getFilters()) { + if (filter.getRequiredColumns().contains(outputColumnName)) { + if (filter instanceof AndFilter) { + preFilterList.add(new AndFilter(recursiveRewriteOnUnnestFilters( + (BooleanFilter) filter, + inputColumn, + inputColumnCapabilites, + filterSplitter, + isTopLevelAndFilter + ))); + } else if (filter instanceof OrFilter) { + // in case of Or Fiters, we set isTopLevelAndFilter to false that prevents pushing down any child filters to base + List orChildFilters = recursiveRewriteOnUnnestFilters( + (BooleanFilter) filter, + inputColumn, + inputColumnCapabilites, + filterSplitter, + false + ); + preFilterList.add(new OrFilter(orChildFilters)); + } else { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( + filter, + inputColumn, + inputColumnCapabilites + ); + if (newFilter != null) { + // this is making sure that we are not pushing the unnest columns filters to base filter without rewriting. + preFilterList.add(newFilter); + filterSplitter.addToPreFilterCount(1); + } + /* + Push down the filters to base only if top level is And Filter + we can not push down if top level filter is OR or unnestColumn is derived expression like arrays + */ + if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) { + filterSplitter.addPreFilter(newFilter != null ? newFilter : filter); + } + filterSplitter.addToOriginalFilterCount(1); + } + } else { + preFilterList.add(filter); + // for filters on non unnest columns, we still need to count the nested filters if any as we are not traversing it in this method + int filterCount = Filters.countNumberOfFilters(filter); + filterSplitter.addToOriginalFilterCount(filterCount); + filterSplitter.addToPreFilterCount(filterCount); + } + } + return preFilterList; + } /** * Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null. */ @@ -465,7 +577,6 @@ public class UnnestStorageAdapter implements StorageAdapter return false; } } - return true; } else if (filter instanceof NotFilter) { return filterMapsOverMultiValueStrings(((NotFilter) filter).getBaseFilter()); diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index c522f8b4aa7..14c2e2f770c 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.filter.BooleanFilter; import org.apache.druid.query.filter.ColumnIndexSelector; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -422,4 +423,19 @@ public class Filters return retVal; } + + public static int countNumberOfFilters(@Nullable Filter filter) + { + if (filter == null) { + return 0; + } + if (filter instanceof BooleanFilter) { + return ((BooleanFilter) filter).getFilters() + .stream() + .map(f -> countNumberOfFilters(f)) + .mapToInt(Integer::intValue) + .sum(); + } + return 1; + } } diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 480c86f51fd..4257ac4c072 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -20,8 +20,8 @@ package org.apache.druid.segment; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; @@ -29,26 +29,13 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.filter.BoundDimFilter; -import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.filter.LikeDimFilter; -import org.apache.druid.query.filter.NullFilter; -import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.AndFilter; -import org.apache.druid.segment.filter.BoundFilter; -import org.apache.druid.segment.filter.ColumnComparisonFilter; -import org.apache.druid.segment.filter.FalseFilter; -import org.apache.druid.segment.filter.LikeFilter; -import org.apache.druid.segment.filter.NotFilter; import org.apache.druid.segment.filter.OrFilter; import org.apache.druid.segment.filter.SelectorFilter; -import org.apache.druid.segment.filter.TrueFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; @@ -72,6 +59,10 @@ import javax.annotation.Nullable; import java.util.Arrays; import java.util.List; +import static org.apache.druid.segment.filter.FilterTestUtils.selector; +import static org.apache.druid.segment.filter.Filters.and; +import static org.apache.druid.segment.filter.Filters.or; + public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { private static Closer CLOSER; @@ -286,13 +277,13 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final OrFilter baseFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), - new SelectorDimFilter(inputColumn, "2", null).toFilter() + selector(OUTPUT_COLUMN_NAME, "1"), + selector(inputColumn, "2") )); final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(inputColumn, "1", null).toFilter(), - new SelectorDimFilter(inputColumn, "2", null).toFilter() + selector(inputColumn, "1"), + selector(inputColumn, "2") )); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -316,14 +307,182 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest return null; }); } + @Test + public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters() + { + final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( + new TestStorageAdapter(INCREMENTAL_INDEX), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + null + ); + final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); + + final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); + + final OrFilter baseFilter = new OrFilter(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "1"), + new AndFilter(ImmutableList.of( + selector(inputColumn, "2"), + selector(OUTPUT_COLUMN_NAME, "10") + )) + )); + + final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( + selector(inputColumn, "1"), + new AndFilter(ImmutableList.of( + selector(inputColumn, "2"), + selector(inputColumn, "10") + )) + )); + + final Sequence cursorSequence = unnestStorageAdapter.makeCursors( + baseFilter, + unnestStorageAdapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + final TestStorageAdapter base = (TestStorageAdapter) unnestStorageAdapter.getBaseAdapter(); + final Filter pushDownFilter = base.getPushDownFilter(); + + Assert.assertEquals(expectedPushDownFilter, pushDownFilter); + cursorSequence.accumulate(null, (accumulated, cursor) -> { + Assert.assertEquals(cursor.getClass(), PostJoinCursor.class); + final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter(); + // OR-case so base filter should match the postJoinFilter + Assert.assertEquals(baseFilter, postFilter); + return null; + }); + } + @Test + public void test_nested_filters_unnested_and_topLevel1And3filtersInOR() + { + final Filter testQueryFilter = and(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || multi-string1 = 1))", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || unnested-multi-string1 = 1))" + ); + } + @Test + public void test_nested_multiLevel_filters_unnested() + { + final Filter testQueryFilter = and(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + and(ImmutableList.of( + selector("newcol", "3"), + selector(COLUMNNAME, "7") + )) + )), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || multi-string1 = 1))", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))" + ); + } + @Test + public void test_nested_multiLevel_filters_unnested5Level() + { + final Filter testQueryFilter = or(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + and(ImmutableList.of( + selector("newcol", "3"), + and(ImmutableList.of( + selector(COLUMNNAME, "7"), + selector("newcol_1", "10") + )) + )) + )), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || multi-string1 = 1)", + "(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 = 1)" + ); + } + @Test + public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() + { + final Filter testQueryFilter = or(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + and(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2"), + selector(OUTPUT_COLUMN_NAME, "1") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && multi-string1 = 1))", + "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))" + ); + } + + @Test + public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs() + { + final Filter testQueryFilter = and(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + or(ImmutableList.of( + selector("newcol", "2"), + selector(COLUMNNAME, "2") + )), + or(ImmutableList.of( + selector("newcol", "4"), + selector(COLUMNNAME, "8"), + selector(OUTPUT_COLUMN_NAME, "6") + )) + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || multi-string1 = 6))", + "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))" + ); + } + + @Test + public void test_nested_filters_unnested_and_topLevelAND2sdf() + { + final Filter testQueryFilter = and(ImmutableList.of( + selector(OUTPUT_COLUMN_NAME, "3"), + selector(COLUMNNAME, "2") + )); + testComputeBaseAndPostUnnestFilters( + testQueryFilter, + "(multi-string1 = 3 && multi-string1 = 2)", + "(unnested-multi-string1 = 3 && multi-string1 = 2)" + ); + } @Test public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) ); final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); @@ -331,7 +490,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - new SelectorDimFilter(inputColumn, "1", null).toFilter(); + selector(inputColumn, "1"); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( @@ -377,10 +536,9 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); final Filter expectedPushDownFilter = - new SelectorDimFilter(inputColumn, "1", null).toFilter(); + selector(inputColumn, "1"); - - final Filter queryFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(); + final Filter queryFilter = new SelectorFilter(OUTPUT_COLUMN_NAME, "1", null); final Sequence cursorSequence = unnestStorageAdapter.makeCursors( queryFilter, unnestStorageAdapter.getInterval(), @@ -409,45 +567,32 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest }); } - @Test - public void testAllowedFiltersForPushdown() + public void testComputeBaseAndPostUnnestFilters( + Filter testQueryFilter, + String expectedBasePushDown, + String expectedPostUnnest + ) { - Filter[] allowed = new Filter[] { - new SelectorFilter("column", "value"), - new InDimFilter("column", ImmutableSet.of("a", "b")), - new LikeFilter("column", null, LikeDimFilter.LikeMatcher.from("hello%", null), null), - new BoundFilter( - new BoundDimFilter("column", "a", "b", true, true, null, null, null) - ), - NullFilter.forColumn("column"), - new EqualityFilter("column", ColumnType.LONG, 1234L, null), - new RangeFilter("column", ColumnType.LONG, 0L, 1234L, true, false, null) - }; - // not exhaustive - Filter[] notAllowed = new Filter[] { - TrueFilter.instance(), - FalseFilter.instance(), - new ColumnComparisonFilter(ImmutableList.of(DefaultDimensionSpec.of("col1"), DefaultDimensionSpec.of("col2"))) - }; - - for (Filter f : allowed) { - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f)); - } - for (Filter f : notAllowed) { - Assert.assertFalse(UnnestStorageAdapter.filterMapsOverMultiValueStrings(f)); - } - - Filter notAnd = new NotFilter( - new AndFilter( - Arrays.asList(allowed) - ) + final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(UNNEST_STORAGE_ADAPTER.getUnnestColumn()); + final VirtualColumn vc = UNNEST_STORAGE_ADAPTER.getUnnestColumn(); + Pair filterPair = UNNEST_STORAGE_ADAPTER.computeBaseAndPostUnnestFilters( + testQueryFilter, + null, + VirtualColumns.EMPTY, + inputColumn, + vc.capabilities(UNNEST_STORAGE_ADAPTER, inputColumn) ); - - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(notAnd)); - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new OrFilter(Arrays.asList(allowed)))); - Assert.assertTrue(UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(notAnd))); - Assert.assertFalse( - UnnestStorageAdapter.filterMapsOverMultiValueStrings(new NotFilter(new OrFilter(Arrays.asList(notAllowed)))) + Filter actualPushDownFilter = filterPair.lhs; + Filter actualPostUnnestFilter = filterPair.rhs; + Assert.assertEquals( + "Expects only top level child of And Filter to push down to base", + expectedBasePushDown, + actualPushDownFilter == null ? "" : actualPushDownFilter.toString() + ); + Assert.assertEquals( + "Should have post unnest filter", + expectedPostUnnest, + actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString() ); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index f5ea4ea6098..9f25f4cd00f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2880,6 +2880,253 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestThriceWithFiltersOnDimAndUnnestCol() + { + cannotVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND dim3_unnest1='Baz'"; + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + and( + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) + ) + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), null + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .virtualColumns(expressionVirtualColumn( + "v0", + "'Baz'", + ColumnType.STRING + )) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, + ImmutableList.of( + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Hello"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Hello"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Baz"}, + new Object[]{"27", "Baz", "Baz", "Hello"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Baz"}, + new Object[]{"27", "Baz", "Hello", "Hello"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Baz"}, + new Object[]{"27", "Baz", "World", "Hello"}, + new Object[]{"27", "Baz", "World", "World"} + ) + ); + } + @Test + public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns() + { + cannotVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND dim3_unnest1='Baz' AND dim3_unnest2='Hello' AND dim3_unnest3='World'"; + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + and( + NullHandling.sqlCompatible() + ? equality("dimZipf", "27", ColumnType.LONG) + : bound("dimZipf", "27", "27", false, false, null, StringComparators.NUMERIC), + equality("j0.unnest", "Baz", ColumnType.STRING) + ) + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), equality("_j0.unnest", "Hello", ColumnType.STRING) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + equality("__j0.unnest", "World", ColumnType.STRING) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns(expressionVirtualColumn( + "v0", + "'Baz'", + ColumnType.STRING + )) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "v0")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, + ImmutableList.of( + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "Hello", "World"} + ) + ); + } + + @Test + public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations() + { + cannotVectorize(); + skipVectorize(); + String sql = " SELECT dimZipf, dim3_unnest1, dim3_unnest2, dim3_unnest3 FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM lotsocolumns, UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest1) )" + + " ,UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest2) \n" + + " ), UNNEST(MV_TO_ARRAY(dimMultivalEnumerated)) as ut(dim3_unnest3) " + + " WHERE dimZipf=27 AND (dim3_unnest1='Baz' OR dim3_unnest2='Hello') AND dim3_unnest3='World'"; + List> expectedQuerySqlCom = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE5), + expressionVirtualColumn( + "j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + NullHandling.sqlCompatible() ? equality("dimZipf", "27", ColumnType.LONG) : range( + "dimZipf", + ColumnType.LONG, + "27", + "27", + false, + false + ) + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + null + ), + or( + equality("j0.unnest", "Baz", ColumnType.STRING), + equality("_j0.unnest", "Hello", ColumnType.STRING) + ) // (j0.unnest = Baz || _j0.unnest = Hello) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"dimMultivalEnumerated\"", + ColumnType.STRING + ), + equality("__j0.unnest", "World", ColumnType.STRING) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "dimZipf", "j0.unnest")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, expectedQuerySqlCom, + ImmutableList.of( + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Baz", "World"}, + new Object[]{"27", "Baz", "Hello", "World"}, + new Object[]{"27", "Baz", "World", "World"}, + new Object[]{"27", "Hello", "Hello", "World"}, + new Object[]{"27", "World", "Hello", "World"} + ) + ); + } @Test public void testUnnestWithGroupBy() { @@ -3148,6 +3395,95 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestVirtualWithColumns1() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' and (m1<=10 or strings='b'))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of(Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + equality("j0.unnest", "a", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .filters(or( + NullHandling.sqlCompatible() + ? range("m1", ColumnType.LONG, null, "10", false, false) + : bound( + "m1", + null, + "10", + false, + false, + null, + StringComparators.NUMERIC + ), + equality("j0.unnest", "b", ColumnType.STRING) + )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest", "m1")) + .build()), + ImmutableList.of(new Object[]{"a", 1.0f}) + ); + } + + @Test + public void testUnnestVirtualWithColumns2() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT strings, m1 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (strings) where (strings='a' or (m1=2 and strings='b'))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of(Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) // (j0.unnest = a || (m1 = 2 && j0.unnest = b)) + .filters(or( + equality("j0.unnest", "a", ColumnType.STRING), + and( + NullHandling.sqlCompatible() + ? equality("m1", "2", ColumnType.FLOAT) + : equality("m1", "2", ColumnType.STRING), + equality("j0.unnest", "b", ColumnType.STRING) + ) + )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest", "m1")) + .build()), + ImmutableList.of( + new Object[]{"a", 1.0f}, + new Object[]{"b", 2.0f} + ) + ); + } @Test public void testUnnestWithFilters() {