From 1fc8fb1b20080ae8c515dcbaa8f5d95f87f7a63e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 9 Oct 2023 06:16:06 -0700 Subject: [PATCH] add a bunch of tests with array typed columns to CalciteArraysQueryTest (#15101) * add a bunch of tests with array typed columns to CalciteArraysQueryTest * fix a bug with unnest filter pushdown when filtering on unnested array columns --- .../druid/msq/test/CalciteMSQTestsHelper.java | 37 +- .../druid/segment/UnnestStorageAdapter.java | 31 +- .../segment/UnnestStorageAdapterTest.java | 190 +- .../sql/calcite/CalciteArraysQueryTest.java | 1779 ++++++++++++++++- .../sql/calcite/util/TestDataBuilder.java | 2 +- 5 files changed, 1962 insertions(+), 77 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java index c68b2331c7d..5b49c649cc0 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java @@ -27,6 +27,7 @@ import com.google.inject.Module; import com.google.inject.TypeLiteral; import org.apache.druid.collections.ReferenceCountingResourceHolder; import org.apache.druid.collections.ResourceHolder; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; @@ -47,6 +48,7 @@ import org.apache.druid.msq.guice.MSQIndexingModule; import org.apache.druid.msq.querykit.DataSegmentProvider; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.ForwardingQueryProcessingPool; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryProcessingPool; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; @@ -74,7 +76,9 @@ import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFacto import org.apache.druid.server.SegmentManager; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; +import org.apache.druid.sql.calcite.CalciteArraysQueryTest; import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.TestDataBuilder; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.easymock.EasyMock; @@ -83,7 +87,6 @@ import org.junit.rules.TemporaryFolder; import org.mockito.Mockito; import javax.annotation.Nullable; -import java.io.File; import java.io.IOException; import java.util.List; import java.util.Set; @@ -232,7 +235,7 @@ public class CalciteMSQTestsHelper .build(); index = IndexBuilder .create() - .tmpDir(new File(temporaryFolder.newFolder(), "1")) + .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema(foo1Schema) .rows(ROWS1) @@ -259,7 +262,7 @@ public class CalciteMSQTestsHelper .build(); index = IndexBuilder .create() - .tmpDir(new File(temporaryFolder.newFolder(), "2")) + .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema(indexSchemaDifferentDim3M1Types) .rows(ROWS2) @@ -269,7 +272,7 @@ public class CalciteMSQTestsHelper case CalciteTests.BROADCAST_DATASOURCE: index = IndexBuilder .create() - .tmpDir(new File(temporaryFolder.newFolder(), "3")) + .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema(INDEX_SCHEMA_NUMERIC_DIMS) .rows(ROWS1_WITH_NUMERIC_DIMS) @@ -278,12 +281,36 @@ public class CalciteMSQTestsHelper case DATASOURCE5: index = IndexBuilder .create() - .tmpDir(new File(temporaryFolder.newFolder(), "5")) + .tmpDir(temporaryFolder.newFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema(INDEX_SCHEMA_LOTS_O_COLUMNS) .rows(ROWS_LOTS_OF_COLUMNS) .buildMMappedIndex(); break; + case CalciteArraysQueryTest.DATA_SOURCE_ARRAYS: + index = IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) + .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withRollup(false) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ) + ) + .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(temporaryFolder.newFolder()) + .buildMMappedIndex(); + break; default: throw new ISE("Cannot query segment %s in test runner", segmentId); diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 02f8c0064aa..e9839a37818 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -322,19 +322,17 @@ public class UnnestStorageAdapter implements StorageAdapter // outside filter contains unnested column // requires check for OR and And filters, disqualify rewrite for non-unnest filters if (queryFilter instanceof BooleanFilter) { - boolean isTopLevelAndFilter = queryFilter instanceof AndFilter; List preFilterList = recursiveRewriteOnUnnestFilters( (BooleanFilter) queryFilter, inputColumn, inputColumnCapabilites, - filterSplitter, - isTopLevelAndFilter + filterSplitter ); // If rewite on entire query filter is successful then add entire filter to preFilter else skip and only add to post filter. - if (filterSplitter.getPreFilterCount() == filterSplitter.getOriginalFilterCount()) { + if (!preFilterList.isEmpty()) { if (queryFilter instanceof AndFilter) { filterSplitter.addPreFilter(new AndFilter(preFilterList)); - } else if (queryFilter instanceof OrFilter) { + } else if (queryFilter instanceof OrFilter && filterSplitter.getPreFilterCount() == filterSplitter.getOriginalFilterCount()) { filterSplitter.addPreFilter(new OrFilter(preFilterList)); } } @@ -470,8 +468,7 @@ public class UnnestStorageAdapter implements StorageAdapter BooleanFilter queryFilter, final String inputColumn, final ColumnCapabilities inputColumnCapabilites, - final FilterSplitter filterSplitter, - final boolean isTopLevelAndFilter + final FilterSplitter filterSplitter ) { final List preFilterList = new ArrayList<>(); @@ -482,25 +479,26 @@ public class UnnestStorageAdapter implements StorageAdapter (BooleanFilter) filter, inputColumn, inputColumnCapabilites, - filterSplitter, - isTopLevelAndFilter + filterSplitter ); if (!andChildFilters.isEmpty()) { preFilterList.add(new AndFilter(andChildFilters)); } } else if (filter instanceof OrFilter) { - // in case of Or Fiters, we set isTopLevelAndFilter to false that prevents pushing down any child filters to base List orChildFilters = recursiveRewriteOnUnnestFilters( (BooleanFilter) filter, inputColumn, inputColumnCapabilites, - filterSplitter, - false + filterSplitter ); - preFilterList.add(new OrFilter(orChildFilters)); + if (orChildFilters.size() == ((OrFilter) filter).getFilters().size()) { + preFilterList.add(new OrFilter(orChildFilters)); + } } else if (filter instanceof NotFilter) { + // nothing to do here... continue; } else { + // can we rewrite final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( filter, inputColumn, @@ -511,13 +509,6 @@ public class UnnestStorageAdapter implements StorageAdapter preFilterList.add(newFilter); filterSplitter.addToPreFilterCount(1); } - /* - Push down the filters to base only if top level is And Filter - we can not push down if top level filter is OR or unnestColumn is derived expression like arrays - */ - if (isTopLevelAndFilter && getUnnestInputIfDirectAccess(unnestColumn) != null) { - filterSplitter.addPreFilter(newFilter != null ? newFilter : filter); - } filterSplitter.addToOriginalFilterCount(1); } } else { diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 2139335b594..286a636e89a 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -20,6 +20,8 @@ package org.apache.druid.segment; import com.google.common.collect.ImmutableList; +import org.apache.druid.data.input.InputSource; +import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; @@ -27,11 +29,14 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.OrFilter; @@ -40,8 +45,10 @@ import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; import org.apache.druid.segment.join.PostJoinCursor; +import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; @@ -53,9 +60,12 @@ import org.joda.time.Interval; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; +import org.junit.ClassRule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import javax.annotation.Nullable; +import java.io.IOException; import java.util.Arrays; import java.util.List; @@ -66,18 +76,23 @@ import static org.apache.druid.segment.filter.Filters.or; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { + @ClassRule + public static TemporaryFolder tmp = new TemporaryFolder(); private static Closer CLOSER; private static IncrementalIndex INCREMENTAL_INDEX; private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; + private static QueryableIndex QUERYABLE_INDEX; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1; + private static UnnestStorageAdapter UNNEST_ARRAYS; private static List ADAPTERS; - private static String COLUMNNAME = "multi-string1"; + private static String INPUT_COLUMN_NAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; + @BeforeClass - public static void setup() + public static void setup() throws IOException { CLOSER = Closer.create(); final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); @@ -98,13 +113,40 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), null ); UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), + null + ); + + final InputSource inputSource = ResourceInputSource.of( + UnnestStorageAdapterTest.class.getClassLoader(), + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE + ); + IndexBuilder bob = IndexBuilder.create() + .tmpDir(tmp.newFolder()) + .schema( + IncrementalIndexSchema.builder() + .withTimestampSpec(NestedDataTestUtils.TIMESTAMP_SPEC) + .withDimensionsSpec(NestedDataTestUtils.AUTO_DISCOVERY) + .withQueryGranularity(Granularities.DAY) + .withRollup(false) + .withMinTimestamp(0) + .build() + ) + .indexSpec(IndexSpec.DEFAULT) + .inputSource(inputSource) + .inputFormat(NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT) + .transform(TransformSpec.NONE) + .inputTmpDir(tmp.newFolder()); + QUERYABLE_INDEX = CLOSER.register(bob.buildMMappedIndex()); + UNNEST_ARRAYS = new UnnestStorageAdapter( + new QueryableIndexStorageAdapter(QUERYABLE_INDEX), + new ExpressionVirtualColumn("u", "\"arrayLongNulls\"", ColumnType.LONG, ExprMacroTable.nil()), null ); @@ -269,7 +311,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), null ); @@ -313,7 +355,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), null ); @@ -365,7 +407,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest selector(OUTPUT_COLUMN_NAME, "3"), or(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2"), + selector(INPUT_COLUMN_NAME, "2"), selector(OUTPUT_COLUMN_NAME, "1") )) )); @@ -383,10 +425,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest or(ImmutableList.of( or(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2"), + selector(INPUT_COLUMN_NAME, "2"), and(ImmutableList.of( selector("newcol", "3"), - selector(COLUMNNAME, "7") + selector(INPUT_COLUMN_NAME, "7") )) )), selector(OUTPUT_COLUMN_NAME, "1") @@ -406,11 +448,11 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest or(ImmutableList.of( or(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2"), + selector(INPUT_COLUMN_NAME, "2"), and(ImmutableList.of( selector("newcol", "3"), and(ImmutableList.of( - selector(COLUMNNAME, "7"), + selector(INPUT_COLUMN_NAME, "7"), selector("newcol_1", "10") )) )) @@ -431,7 +473,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest selector(OUTPUT_COLUMN_NAME, "3"), and(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2"), + selector(INPUT_COLUMN_NAME, "2"), selector(OUTPUT_COLUMN_NAME, "1") )) )); @@ -449,11 +491,11 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest selector(OUTPUT_COLUMN_NAME, "3"), or(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2") + selector(INPUT_COLUMN_NAME, "2") )), or(ImmutableList.of( selector("newcol", "4"), - selector(COLUMNNAME, "8"), + selector(INPUT_COLUMN_NAME, "8"), selector(OUTPUT_COLUMN_NAME, "6") )) )); @@ -469,7 +511,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { final Filter testQueryFilter = and(ImmutableList.of( not(selector(OUTPUT_COLUMN_NAME, "3")), - selector(COLUMNNAME, "2") + selector(INPUT_COLUMN_NAME, "2") )); testComputeBaseAndPostUnnestFilters( testQueryFilter, @@ -483,7 +525,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { final Filter testQueryFilter = or(ImmutableList.of( not(selector(OUTPUT_COLUMN_NAME, "3")), - selector(COLUMNNAME, "2") + selector(INPUT_COLUMN_NAME, "2") )); testComputeBaseAndPostUnnestFilters( testQueryFilter, @@ -500,10 +542,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest or(ImmutableList.of( or(ImmutableList.of( selector("newcol", "2"), - selector(COLUMNNAME, "2"), + selector(INPUT_COLUMN_NAME, "2"), and(ImmutableList.of( selector("newcol", "3"), - selector(COLUMNNAME, "7") + selector(INPUT_COLUMN_NAME, "7") )) )), selector(OUTPUT_COLUMN_NAME, "1") @@ -516,12 +558,97 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest ); } + @Test + public void testPartialArrayPushdown() + { + final Filter testQueryFilter = and( + ImmutableList.of( + new EqualityFilter("u", ColumnType.LONG, 1L, null), + new EqualityFilter("str", ColumnType.STRING, "a", null), + new EqualityFilter("long", ColumnType.LONG, 1L, null) + ) + ); + testComputeBaseAndPostUnnestFilters( + UNNEST_ARRAYS, + testQueryFilter, + "(str = a && long = 1 (LONG))", + "(u = 1 (LONG) && str = a && long = 1 (LONG))" + ); + } + + @Test + public void testPartialArrayPushdownNested() + { + final Filter testQueryFilter = and( + ImmutableList.of( + and( + ImmutableList.of( + new EqualityFilter("u", ColumnType.LONG, 1L, null), + new EqualityFilter("str", ColumnType.STRING, "a", null) + ) + ), + new EqualityFilter("long", ColumnType.LONG, 1L, null) + ) + ); + // this seems wrong since we should be able to push down str = a and long = 1 + testComputeBaseAndPostUnnestFilters( + UNNEST_ARRAYS, + testQueryFilter, + "(str = a && long = 1 (LONG))", + "(u = 1 (LONG) && str = a && long = 1 (LONG))" + ); + } + + @Test + public void testPartialArrayPushdown2() + { + final Filter testQueryFilter = and( + ImmutableList.of( + or( + ImmutableList.of( + new EqualityFilter("u", ColumnType.LONG, 1L, null), + new EqualityFilter("str", ColumnType.STRING, "a", null) + ) + ), + new EqualityFilter("long", ColumnType.LONG, 1L, null) + ) + ); + testComputeBaseAndPostUnnestFilters( + UNNEST_ARRAYS, + testQueryFilter, + "long = 1 (LONG)", + "((u = 1 (LONG) || str = a) && long = 1 (LONG))" + ); + } + + @Test + public void testArrayCannotPushdown2() + { + final Filter testQueryFilter = or( + ImmutableList.of( + or( + ImmutableList.of( + new EqualityFilter("u", ColumnType.LONG, 1L, null), + new EqualityFilter("str", ColumnType.STRING, "a", null) + ) + ), + new EqualityFilter("long", ColumnType.LONG, 1L, null) + ) + ); + testComputeBaseAndPostUnnestFilters( + UNNEST_ARRAYS, + testQueryFilter, + "", + "(u = 1 (LONG) || str = a || long = 1 (LONG))" + ); + } + @Test public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) ); @@ -567,7 +694,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest { final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( new TestStorageAdapter(INCREMENTAL_INDEX), - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + INPUT_COLUMN_NAME + "\"", null, ExprMacroTable.nil()), null ); @@ -613,14 +740,29 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest String expectedPostUnnest ) { - final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(UNNEST_STORAGE_ADAPTER.getUnnestColumn()); - final VirtualColumn vc = UNNEST_STORAGE_ADAPTER.getUnnestColumn(); - Pair filterPair = UNNEST_STORAGE_ADAPTER.computeBaseAndPostUnnestFilters( + testComputeBaseAndPostUnnestFilters( + UNNEST_STORAGE_ADAPTER, + testQueryFilter, + expectedBasePushDown, + expectedPostUnnest + ); + } + + public void testComputeBaseAndPostUnnestFilters( + UnnestStorageAdapter adapter, + Filter testQueryFilter, + String expectedBasePushDown, + String expectedPostUnnest + ) + { + final String inputColumn = adapter.getUnnestInputIfDirectAccess(adapter.getUnnestColumn()); + final VirtualColumn vc = adapter.getUnnestColumn(); + Pair filterPair = adapter.computeBaseAndPostUnnestFilters( testQueryFilter, null, VirtualColumns.EMPTY, inputColumn, - vc.capabilities(UNNEST_STORAGE_ADAPTER, inputColumn) + vc.capabilities(adapter, inputColumn) ); Filter actualPushDownFilter = filterPair.lhs; Filter actualPostUnnestFilter = filterPair.rhs; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index e6a669b9c28..e70224274b1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -22,19 +22,27 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.ResourceInputSource; +import org.apache.druid.guice.DruidInjectorBuilder; +import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.DataSource; import org.apache.druid.query.Druids; import org.apache.druid.query.FilteredDataSource; +import org.apache.druid.query.FrameBasedInlineDataSource; import org.apache.druid.query.InlineDataSource; import org.apache.druid.query.LookupDataSource; +import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.TableDataSource; import org.apache.druid.query.UnnestDataSource; import org.apache.druid.query.aggregation.CountAggregatorFactory; @@ -54,18 +62,35 @@ import org.apache.druid.query.groupby.having.DimFilterHavingSpec; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.NoopLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.query.topn.DimensionTopNMetricSpec; import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.FrameBasedInlineSegmentWrangler; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.InlineSegmentWrangler; +import org.apache.druid.segment.LookupSegmentWrangler; +import org.apache.druid.segment.MapSegmentWrangler; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.SegmentWrangler; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinType; +import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.server.QueryStackTests; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; +import org.apache.druid.sql.calcite.util.TestDataBuilder; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -82,6 +107,154 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest .put(QueryContexts.CTX_SQL_STRINGIFY_ARRAYS, false) .build(); + + public static final String DATA_SOURCE_ARRAYS = "arrays"; + + public static void assertResultsDeepEquals(String sql, List expected, List results) + { + for (int row = 0; row < results.size(); row++) { + for (int col = 0; col < results.get(row).length; col++) { + final String rowString = StringUtils.format("result #%d: %s", row + 1, sql); + assertDeepEquals(rowString + " - column: " + col + ":", expected.get(row)[col], results.get(row)[col]); + } + } + } + + public static void assertDeepEquals(String path, Object expected, Object actual) + { + if (expected instanceof List && actual instanceof List) { + List expectedList = (List) expected; + List actualList = (List) actual; + Assert.assertEquals(path + " arrays length mismatch", expectedList.size(), actualList.size()); + for (int i = 0; i < expectedList.size(); i++) { + assertDeepEquals(path + "[" + i + "]", expectedList.get(i), actualList.get(i)); + } + } else { + Assert.assertEquals(path, expected, actual); + } + } + + @Override + public void configureGuice(DruidInjectorBuilder builder) + { + super.configureGuice(builder); + builder.addModule(new NestedDataModule()); + } + + @SuppressWarnings("resource") + @Override + public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( + final QueryRunnerFactoryConglomerate conglomerate, + final JoinableFactoryWrapper joinableFactory, + final Injector injector + ) throws IOException + { + NestedDataModule.registerHandlersAndSerde(); + + final QueryableIndex foo = IndexBuilder + .create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(TestDataBuilder.INDEX_SCHEMA) + .rows(TestDataBuilder.ROWS1) + .buildMMappedIndex(); + + final QueryableIndex numfoo = IndexBuilder + .create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(TestDataBuilder.INDEX_SCHEMA_NUMERIC_DIMS) + .rows(TestDataBuilder.ROWS1_WITH_NUMERIC_DIMS) + .buildMMappedIndex(); + + final QueryableIndex indexLotsOfColumns = IndexBuilder + .create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(TestDataBuilder.INDEX_SCHEMA_LOTS_O_COLUMNS) + .rows(TestDataBuilder.ROWS_LOTS_OF_COLUMNS) + .buildMMappedIndex(); + + final QueryableIndex indexArrays = + IndexBuilder.create() + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) + .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withRollup(false) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ) + ) + .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(temporaryFolder.newFolder()) + .buildMMappedIndex(); + + SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker( + conglomerate, + new MapSegmentWrangler( + ImmutableMap., SegmentWrangler>builder() + .put(InlineDataSource.class, new InlineSegmentWrangler()) + .put(FrameBasedInlineDataSource.class, new FrameBasedInlineSegmentWrangler()) + .put( + LookupDataSource.class, + new LookupSegmentWrangler(injector.getInstance(LookupExtractorFactoryContainerProvider.class)) + ) + .build() + ), + joinableFactory, + QueryStackTests.DEFAULT_NOOP_SCHEDULER + ); + walker.add( + DataSegment.builder() + .dataSource(CalciteTests.DATASOURCE1) + .interval(foo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + foo + ).add( + DataSegment.builder() + .dataSource(CalciteTests.DATASOURCE3) + .interval(numfoo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + numfoo + ).add( + DataSegment.builder() + .dataSource(CalciteTests.DATASOURCE5) + .interval(indexLotsOfColumns.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + indexLotsOfColumns + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_ARRAYS) + .version("1") + .interval(indexArrays.getDataInterval()) + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexArrays + ); + + return walker; + } + // test some query stuffs, sort of limited since no native array column types so either need to use constructor or // array aggregator @Test @@ -135,6 +308,36 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testGroupByArrayColumnFromCase() + { + cannotVectorize(); + testQuery( + "SELECT CASE WHEN arrayStringNulls = ARRAY['a', 'b'] THEN arrayLongNulls END as arr, count(1) from arrays GROUP BY 1", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "case_searched((\"arrayStringNulls\" == array('a','b')),\"arrayLongNulls\",null)", + ColumnType.LONG_ARRAY + )) + .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.LONG_ARRAY)) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new CountAggregatorFactory("a0")) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{null, 11L}, + new Object[]{Arrays.asList(1L, null, 3L), 1L}, + new Object[]{Arrays.asList(2L, 3L), 2L} + ) + ); + } + @Test public void testSelectNonConstantArrayExpressionFromTable() { @@ -206,9 +409,6 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest @Test public void testSomeArrayFunctionsWithScanQuery() { - // Yes these outputs are strange sometimes, arrays are in a partial state of existence so end up a bit - // stringy for now this is because virtual column selectors are coercing values back to stringish so that - // multi-valued string dimensions can be grouped on. List expectedResults; if (useDefault) { expectedResults = ImmutableList.of( @@ -380,6 +580,136 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testSomeArrayFunctionsWithScanQueryArrayColumns() + { + List expectedResults; + if (useDefault) { + expectedResults = ImmutableList.of( + new Object[]{null, "[]", null, null, null, "[1]", "[2]", null, null, null, "[1,2,3]", null, "", null, null, "", null, null}, + new Object[]{"[\"a\",\"b\"]", "[2,3]", "[null]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[2,3,1]", "[2,2,3]", "[null,1.1]", "[2.2,null]", null, null, null, "a", 2L, 0.0D, "a", 2L, 0.0D}, + new Object[]{"[\"b\",\"b\"]", "[1]", null, "[\"b\",\"b\",\"foo\"]", "[\"foo\",\"b\",\"b\"]", "[1,1]", "[2,1]", null, null, "[\"d\",\"e\",\"b\",\"b\"]", "[1,4,1]", null, "b", 1L, null, "b", 1L, null}, + new Object[]{null, "[null,2,9]", "[999.0,5.5,null]", null, null, "[null,2,9,1]", "[2,null,2,9]", "[999.0,5.5,null,1.1]", "[2.2,999.0,5.5,null]", null, null, null, "", 0L, 999.0D, "", 0L, 999.0D}, + new Object[]{"[\"a\",\"b\"]", "[1,null,3]", "[1.1,2.2,null]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[1,null,3,1]", "[2,1,null,3]", "[1.1,2.2,null,1.1]", "[2.2,1.1,2.2,null]", "[\"a\",\"b\",\"a\",\"b\"]", "[1,2,3,1,null,3]", "[1.1,2.2,3.3,1.1,2.2,null]", "a", 1L, 1.1D, "a", 1L, 1.1D}, + new Object[]{"[\"d\",null,\"b\"]", "[1,2,3]", "[null,2.2,null]", "[\"d\",null,\"b\",\"foo\"]", "[\"foo\",\"d\",null,\"b\"]", "[1,2,3,1]", "[2,1,2,3]", "[null,2.2,null,1.1]", "[2.2,null,2.2,null]", "[\"b\",\"c\",\"d\",null,\"b\"]", "[1,2,3,4,1,2,3]", "[1.1,3.3,null,2.2,null]", "d", 1L, 0.0D, "d", 1L, 0.0D}, + new Object[]{"[null,\"b\"]", null, "[999.0,null,5.5]", "[null,\"b\",\"foo\"]", "[\"foo\",null,\"b\"]", null, null, "[999.0,null,5.5,1.1]", "[2.2,999.0,null,5.5]", "[\"a\",\"b\",\"c\",null,\"b\"]", null, "[3.3,4.4,5.5,999.0,null,5.5]", "", null, 999.0D, "", null, 999.0D}, + new Object[]{null, null, "[]", null, null, null, null, "[1.1]", "[2.2]", null, null, "[1.1,2.2,3.3]", "", null, null, "", null, null}, + new Object[]{"[\"a\",\"b\"]", "[2,3]", "[null,1.1]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[2,3,1]", "[2,2,3]", "[null,1.1,1.1]", "[2.2,null,1.1]", null, null, null, "a", 2L, 0.0D, "a", 2L, 0.0D}, + new Object[]{"[\"b\",\"b\"]", "[null]", null, "[\"b\",\"b\",\"foo\"]", "[\"foo\",\"b\",\"b\"]", "[null,1]", "[2,null]", null, null, "[\"d\",\"e\",\"b\",\"b\"]", "[1,4,null]", null, "b", 0L, null, "b", 0L, null}, + new Object[]{"[null]", "[null,2,9]", "[999.0,5.5,null]", "[null,\"foo\"]", "[\"foo\",null]", "[null,2,9,1]", "[2,null,2,9]", "[999.0,5.5,null,1.1]", "[2.2,999.0,5.5,null]", "[\"a\",\"b\",null]", null, null, "", 0L, 999.0D, "", 0L, 999.0D}, + new Object[]{"[]", "[1,null,3]", "[1.1,2.2,null]", "[\"foo\"]", "[\"foo\"]", "[1,null,3,1]", "[2,1,null,3]", "[1.1,2.2,null,1.1]", "[2.2,1.1,2.2,null]", "[\"a\",\"b\"]", "[1,2,3,1,null,3]", "[1.1,2.2,3.3,1.1,2.2,null]", "", 1L, 1.1D, "", 1L, 1.1D}, + new Object[]{"[\"d\",null,\"b\"]", "[1,2,3]", "[null,2.2,null]", "[\"d\",null,\"b\",\"foo\"]", "[\"foo\",\"d\",null,\"b\"]", "[1,2,3,1]", "[2,1,2,3]", "[null,2.2,null,1.1]", "[2.2,null,2.2,null]", "[\"b\",\"c\",\"d\",null,\"b\"]", "[1,2,3,4,1,2,3]", "[1.1,3.3,null,2.2,null]", "d", 1L, 0.0D, "d", 1L, 0.0D}, + new Object[]{"[null,\"b\"]", null, "[999.0,null,5.5]", "[null,\"b\",\"foo\"]", "[\"foo\",null,\"b\"]", null, null, "[999.0,null,5.5,1.1]", "[2.2,999.0,null,5.5]", "[\"a\",\"b\",\"c\",null,\"b\"]", null, "[3.3,4.4,5.5,999.0,null,5.5]", "", null, 999.0D, "", null, 999.0D} + ); + } else { + expectedResults = ImmutableList.of( + new Object[]{null, "[]", null, null, null, "[1]", "[2]", null, null, null, "[1,2,3]", null, null, null, null, null, null, null}, + new Object[]{"[\"a\",\"b\"]", "[2,3]", "[null]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[2,3,1]", "[2,2,3]", "[null,1.1]", "[2.2,null]", null, null, null, "a", 2L, null, "a", 2L, null}, + new Object[]{"[\"b\",\"b\"]", "[1]", null, "[\"b\",\"b\",\"foo\"]", "[\"foo\",\"b\",\"b\"]", "[1,1]", "[2,1]", null, null, "[\"d\",\"e\",\"b\",\"b\"]", "[1,4,1]", null, "b", 1L, null, "b", 1L, null}, + new Object[]{null, "[null,2,9]", "[999.0,5.5,null]", null, null, "[null,2,9,1]", "[2,null,2,9]", "[999.0,5.5,null,1.1]", "[2.2,999.0,5.5,null]", null, null, null, null, null, 999.0D, null, null, 999.0D}, + new Object[]{"[\"a\",\"b\"]", "[1,null,3]", "[1.1,2.2,null]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[1,null,3,1]", "[2,1,null,3]", "[1.1,2.2,null,1.1]", "[2.2,1.1,2.2,null]", "[\"a\",\"b\",\"a\",\"b\"]", "[1,2,3,1,null,3]", "[1.1,2.2,3.3,1.1,2.2,null]", "a", 1L, 1.1D, "a", 1L, 1.1D}, + new Object[]{"[\"d\",null,\"b\"]", "[1,2,3]", "[null,2.2,null]", "[\"d\",null,\"b\",\"foo\"]", "[\"foo\",\"d\",null,\"b\"]", "[1,2,3,1]", "[2,1,2,3]", "[null,2.2,null,1.1]", "[2.2,null,2.2,null]", "[\"b\",\"c\",\"d\",null,\"b\"]", "[1,2,3,4,1,2,3]", "[1.1,3.3,null,2.2,null]", "d", 1L, null, "d", 1L, null}, + new Object[]{"[null,\"b\"]", null, "[999.0,null,5.5]", "[null,\"b\",\"foo\"]", "[\"foo\",null,\"b\"]", null, null, "[999.0,null,5.5,1.1]", "[2.2,999.0,null,5.5]", "[\"a\",\"b\",\"c\",null,\"b\"]", null, "[3.3,4.4,5.5,999.0,null,5.5]", null, null, 999.0D, null, null, 999.0D}, + new Object[]{null, null, "[]", null, null, null, null, "[1.1]", "[2.2]", null, null, "[1.1,2.2,3.3]", null, null, null, null, null, null}, + new Object[]{"[\"a\",\"b\"]", "[2,3]", "[null,1.1]", "[\"a\",\"b\",\"foo\"]", "[\"foo\",\"a\",\"b\"]", "[2,3,1]", "[2,2,3]", "[null,1.1,1.1]", "[2.2,null,1.1]", null, null, null, "a", 2L, null, "a", 2L, null}, + new Object[]{"[\"b\",\"b\"]", "[null]", null, "[\"b\",\"b\",\"foo\"]", "[\"foo\",\"b\",\"b\"]", "[null,1]", "[2,null]", null, null, "[\"d\",\"e\",\"b\",\"b\"]", "[1,4,null]", null, "b", null, null, "b", null, null}, + new Object[]{"[null]", "[null,2,9]", "[999.0,5.5,null]", "[null,\"foo\"]", "[\"foo\",null]", "[null,2,9,1]", "[2,null,2,9]", "[999.0,5.5,null,1.1]", "[2.2,999.0,5.5,null]", "[\"a\",\"b\",null]", null, null, null, null, 999.0D, null, null, 999.0D}, + new Object[]{"[]", "[1,null,3]", "[1.1,2.2,null]", "[\"foo\"]", "[\"foo\"]", "[1,null,3,1]", "[2,1,null,3]", "[1.1,2.2,null,1.1]", "[2.2,1.1,2.2,null]", "[\"a\",\"b\"]", "[1,2,3,1,null,3]", "[1.1,2.2,3.3,1.1,2.2,null]", null, 1L, 1.1D, null, 1L, 1.1D}, + new Object[]{"[\"d\",null,\"b\"]", "[1,2,3]", "[null,2.2,null]", "[\"d\",null,\"b\",\"foo\"]", "[\"foo\",\"d\",null,\"b\"]", "[1,2,3,1]", "[2,1,2,3]", "[null,2.2,null,1.1]", "[2.2,null,2.2,null]", "[\"b\",\"c\",\"d\",null,\"b\"]", "[1,2,3,4,1,2,3]", "[1.1,3.3,null,2.2,null]", "d", 1L, null, "d", 1L, null}, + new Object[]{"[null,\"b\"]", null, "[999.0,null,5.5]", "[null,\"b\",\"foo\"]", "[\"foo\",null,\"b\"]", null, null, "[999.0,null,5.5,1.1]", "[2.2,999.0,null,5.5]", "[\"a\",\"b\",\"c\",null,\"b\"]", null, "[3.3,4.4,5.5,999.0,null,5.5]", null, null, 999.0D, null, null, 999.0D} + ); + } + testQuery( + "SELECT" + + " arrayStringNulls," + + " arrayLongNulls," + + " arrayDoubleNulls," + + " ARRAY_APPEND(arrayStringNulls, 'foo')," + + " ARRAY_PREPEND('foo', arrayStringNulls)," + + " ARRAY_APPEND(arrayLongNulls, 1)," + + " ARRAY_PREPEND(2, arrayLongNulls)," + + " ARRAY_APPEND(arrayDoubleNulls, 1.1)," + + " ARRAY_PREPEND(2.2, arrayDoubleNulls)," + + " ARRAY_CONCAT(arrayString,arrayStringNulls)," + + " ARRAY_CONCAT(arrayLong,arrayLongNulls)," + + " ARRAY_CONCAT(arrayDouble,arrayDoubleNulls)," + + " ARRAY_OFFSET(arrayStringNulls,0)," + + " ARRAY_OFFSET(arrayLongNulls,0)," + + " ARRAY_OFFSET(arrayDoubleNulls,0)," + + " ARRAY_ORDINAL(arrayStringNulls,1)," + + " ARRAY_ORDINAL(arrayLongNulls,1)," + + " ARRAY_ORDINAL(arrayDoubleNulls,1)" + + " FROM druid.arrays", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + // these report as strings even though they are not, someday this will not be so + expressionVirtualColumn("v0", "array_append(\"arrayStringNulls\",'foo')", ColumnType.STRING_ARRAY), + expressionVirtualColumn("v1", "array_prepend('foo',\"arrayStringNulls\")", ColumnType.STRING_ARRAY), + expressionVirtualColumn("v10", "array_offset(\"arrayLongNulls\",0)", ColumnType.LONG), + expressionVirtualColumn("v11", "array_offset(\"arrayDoubleNulls\",0)", ColumnType.DOUBLE), + expressionVirtualColumn("v12", "array_ordinal(\"arrayStringNulls\",1)", ColumnType.STRING), + expressionVirtualColumn("v13", "array_ordinal(\"arrayLongNulls\",1)", ColumnType.LONG), + expressionVirtualColumn("v14", "array_ordinal(\"arrayDoubleNulls\",1)", ColumnType.DOUBLE), + expressionVirtualColumn("v2", "array_append(\"arrayLongNulls\",1)", ColumnType.LONG_ARRAY), + expressionVirtualColumn("v3", "array_prepend(2,\"arrayLongNulls\")", ColumnType.LONG_ARRAY), + expressionVirtualColumn("v4", "array_append(\"arrayDoubleNulls\",1.1)", ColumnType.DOUBLE_ARRAY), + expressionVirtualColumn("v5", "array_prepend(2.2,\"arrayDoubleNulls\")", ColumnType.DOUBLE_ARRAY), + expressionVirtualColumn("v6", "array_concat(\"arrayString\",\"arrayStringNulls\")", ColumnType.STRING_ARRAY), + expressionVirtualColumn("v7", "array_concat(\"arrayLong\",\"arrayLongNulls\")", ColumnType.LONG_ARRAY), + expressionVirtualColumn("v8", "array_concat(\"arrayDouble\",\"arrayDoubleNulls\")", ColumnType.DOUBLE_ARRAY), + expressionVirtualColumn("v9", "array_offset(\"arrayStringNulls\",0)", ColumnType.STRING) + ) + .columns( + "arrayDoubleNulls", + "arrayLongNulls", + "arrayStringNulls", + "v0", + "v1", + "v10", + "v11", + "v12", + "v13", + "v14", + "v2", + "v3", + "v4", + "v5", + "v6", + "v7", + "v8", + "v9" + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + expectedResults, + RowSignature.builder() + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .add("EXPR$3", ColumnType.STRING_ARRAY) + .add("EXPR$4", ColumnType.STRING_ARRAY) + .add("EXPR$5", ColumnType.LONG_ARRAY) + .add("EXPR$6", ColumnType.LONG_ARRAY) + .add("EXPR$7", ColumnType.DOUBLE_ARRAY) + .add("EXPR$8", ColumnType.DOUBLE_ARRAY) + .add("EXPR$9", ColumnType.STRING_ARRAY) + .add("EXPR$10", ColumnType.LONG_ARRAY) + .add("EXPR$11", ColumnType.DOUBLE_ARRAY) + .add("EXPR$12", ColumnType.STRING) + .add("EXPR$13", ColumnType.LONG) + .add("EXPR$14", ColumnType.DOUBLE) + .add("EXPR$15", ColumnType.STRING) + .add("EXPR$16", ColumnType.LONG) + .add("EXPR$17", ColumnType.DOUBLE) + .build() + ); + } + @Test public void testSomeArrayFunctionsWithScanQueryNoStringify() { @@ -521,6 +851,84 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayOverlapFilterStringArrayColumn() + { + testQuery( + "SELECT arrayStringNulls FROM druid.arrays WHERE ARRAY_OVERLAP(arrayStringNulls, ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))")) + .columns("arrayStringNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"b\",\"b\"]"}, + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"d\",null,\"b\"]"}, + new Object[]{"[null,\"b\"]"} + ) + ); + } + + @Test + public void testArrayOverlapFilterLongArrayColumn() + { + testQuery( + "SELECT arrayLongNulls FROM druid.arrays WHERE ARRAY_OVERLAP(arrayLongNulls, ARRAY[1, 2]) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))")) + .columns("arrayLongNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[2,3]"}, + new Object[]{"[1]"}, + new Object[]{"[null,2,9]"}, + new Object[]{"[1,null,3]"}, + new Object[]{"[1,2,3]"} + ) + ); + } + + @Test + public void testArrayOverlapFilterDoubleArrayColumn() + { + testQuery( + "SELECT arrayDoubleNulls FROM druid.arrays WHERE ARRAY_OVERLAP(arrayDoubleNulls, ARRAY[1.1, 2.2]) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))")) + .columns("arrayDoubleNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1.1,2.2,null]"}, + new Object[]{"[null,2.2,null]"}, + new Object[]{"[null,1.1]"}, + new Object[]{"[1.1,2.2,null]"}, + new Object[]{"[null,2.2,null]"} + ) + ); + } + @Test public void testArrayOverlapFilterWithExtractionFn() { @@ -570,6 +978,83 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayOverlapFilterArrayStringColumns() + { + testQuery( + "SELECT arrayStringNulls, arrayString FROM druid.arrays WHERE ARRAY_OVERLAP(arrayStringNulls, arrayString) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayStringNulls\",\"arrayString\")")) + .columns("arrayString", "arrayStringNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]", "[\"a\",\"b\"]"}, + new Object[]{"[\"d\",null,\"b\"]", "[\"b\",\"c\"]"}, + new Object[]{"[null,\"b\"]", "[\"a\",\"b\",\"c\"]"}, + new Object[]{"[\"d\",null,\"b\"]", "[\"b\",\"c\"]"}, + new Object[]{"[null,\"b\"]", "[\"a\",\"b\",\"c\"]"} + ) + ); + } + + @Test + public void testArrayOverlapFilterArrayLongColumns() + { + testQuery( + "SELECT arrayLongNulls, arrayLong FROM druid.arrays WHERE ARRAY_OVERLAP(arrayLongNulls, arrayLong) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayLongNulls\",\"arrayLong\")")) + .columns("arrayLong", "arrayLongNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1]", "[1,4]"}, + new Object[]{"[1,null,3]", "[1,2,3]"}, + new Object[]{"[1,2,3]", "[1,2,3,4]"}, + new Object[]{"[1,null,3]", "[1,2,3]"}, + new Object[]{"[1,2,3]", "[1,2,3,4]"} + ) + ); + } + + @Test + public void testArrayOverlapFilterArrayDoubleColumns() + { + testQuery( + "SELECT arrayDoubleNulls, arrayDouble FROM druid.arrays WHERE ARRAY_OVERLAP(arrayDoubleNulls, arrayDouble) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",\"arrayDouble\")")) + .columns("arrayDouble", "arrayDoubleNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1.1,2.2,null]", "[1.1,2.2,3.3]"}, + new Object[]{"[999.0,null,5.5]", "[3.3,4.4,5.5]"}, + new Object[]{"[1.1,2.2,null]", "[1.1,2.2,3.3]"}, + new Object[]{"[999.0,null,5.5]", "[3.3,4.4,5.5]"} + ) + ); + } + @Test public void testArrayContainsFilter() { @@ -597,6 +1082,83 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayContainsFilterArrayStringColumn() + { + testQuery( + "SELECT arrayStringNulls FROM druid.arrays WHERE ARRAY_CONTAINS(arrayStringNulls, ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayStringNulls\",array('a','b'))") + ) + .columns("arrayStringNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"a\",\"b\"]"} + ) + ); + } + + @Test + public void testArrayContainsFilterArrayLongColumn() + { + testQuery( + "SELECT arrayLongNulls FROM druid.arrays WHERE ARRAY_CONTAINS(arrayLongNulls, ARRAY[1, null]) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayLongNulls\",array(1,null))") + ) + .columns("arrayLongNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1,null,3]"}, + new Object[]{"[1,null,3]"} + ) + ); + } + + @Test + public void testArrayContainsFilterArrayDoubleColumn() + { + testQuery( + "SELECT arrayDoubleNulls FROM druid.arrays WHERE ARRAY_CONTAINS(arrayDoubleNulls, ARRAY[1.1, null]) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayDoubleNulls\",array(1.1,null))") + ) + .columns("arrayDoubleNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1.1,2.2,null]"}, + new Object[]{"[null,1.1]"}, + new Object[]{"[1.1,2.2,null]"} + ) + ); + } + @Test public void testArrayContainsFilterWithExtractionFn() { @@ -671,6 +1233,79 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayContainsFilterArrayStringColumns() + { + testQuery( + "SELECT arrayStringNulls, arrayString FROM druid.arrays WHERE ARRAY_CONTAINS(arrayStringNulls, arrayString) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayStringNulls\",\"arrayString\")") + ) + .columns("arrayString", "arrayStringNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]", "[\"a\",\"b\"]"} + ) + ); + } + + @Test + public void testArrayContainsFilterArrayLongColumns() + { + testQuery( + "SELECT arrayLong, arrayLongNulls FROM druid.arrays WHERE ARRAY_CONTAINS(arrayLong, arrayLongNulls) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayLong\",\"arrayLongNulls\")") + ) + .columns("arrayLong", "arrayLongNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[1,2,3]", "[]"}, + new Object[]{"[1,4]", "[1]"}, + new Object[]{"[1,2,3,4]", "[1,2,3]"}, + new Object[]{"[1,2,3,4]", "[1,2,3]"} + ) + ); + } + + @Test + public void testArrayContainsFilterArrayDoubleColumns() + { + testQuery( + "SELECT arrayDoubleNulls, arrayDouble FROM druid.arrays WHERE ARRAY_CONTAINS(arrayDoubleNulls, arrayDouble) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayDoubleNulls\",\"arrayDouble\")") + ) + .columns("arrayDouble", "arrayDoubleNulls") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of() + ); + } + @Test public void testArraySlice() { @@ -699,6 +1334,46 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArraySliceArrayColumns() + { + testQuery( + "SELECT ARRAY_SLICE(arrayString, 1), ARRAY_SLICE(arrayLong, 2), ARRAY_SLICE(arrayDoubleNulls, 1) FROM druid.arrays", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY, + ImmutableList.of( + new Druids.ScanQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn("v0", "array_slice(\"arrayString\",1)", ColumnType.STRING_ARRAY), + expressionVirtualColumn("v1", "array_slice(\"arrayLong\",2)", ColumnType.LONG_ARRAY), + expressionVirtualColumn("v2", "array_slice(\"arrayDoubleNulls\",1)", ColumnType.DOUBLE_ARRAY) + ) + .columns("v0", "v1", "v2") + .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{null, Collections.singletonList(3L), null}, + new Object[]{null, null, Collections.emptyList()}, + new Object[]{ImmutableList.of("e"), Collections.emptyList(), null}, + new Object[]{ImmutableList.of("b"), null, Arrays.asList(5.5D, null)}, + new Object[]{ImmutableList.of("b"), Collections.singletonList(3L), Arrays.asList(2.2D, null)}, + new Object[]{ImmutableList.of("c"), Arrays.asList(3L, 4L), Arrays.asList(2.2D, null)}, + new Object[]{ImmutableList.of("b", "c"), Collections.emptyList(), Arrays.asList(null, 5.5D)}, + new Object[]{null, Collections.singletonList(3L), null}, + new Object[]{null, null, Collections.singletonList(1.1D)}, + new Object[]{ImmutableList.of("e"), Collections.emptyList(), null}, + new Object[]{ImmutableList.of("b"), null, Arrays.asList(5.5D, null)}, + new Object[]{ImmutableList.of("b"), Collections.singletonList(3L), Arrays.asList(2.2D, null)}, + new Object[]{ImmutableList.of("c"), Arrays.asList(3L, 4L), Arrays.asList(2.2D, null)}, + new Object[]{ImmutableList.of("b", "c"), Collections.emptyList(), Arrays.asList(null, 5.5D)} + ) + ); + } + @Test public void testArrayLength() { @@ -742,6 +1417,64 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayLengthArrayColumn() + { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + + testQuery( + "SELECT arrayStringNulls, ARRAY_LENGTH(arrayStringNulls), SUM(cnt) FROM druid.arrays GROUP BY 1, 2 ORDER BY 2 DESC", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns(expressionVirtualColumn("v0", "array_length(\"arrayStringNulls\")", ColumnType.LONG)) + .setDimensions( + dimensions( + new DefaultDimensionSpec("arrayStringNulls", "d0", ColumnType.STRING_ARRAY), + new DefaultDimensionSpec("v0", "d1", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "d1", + OrderByColumnSpec.Direction.DESCENDING, + StringComparators.NUMERIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of( + new Object[]{"[\"d\",null,\"b\"]", 3, 2L}, + new Object[]{"[null,\"b\"]", 2, 2L}, + new Object[]{"[\"a\",\"b\"]", 2, 3L}, + new Object[]{"[\"b\",\"b\"]", 2, 2L}, + new Object[]{"[null]", 1, 1L}, + new Object[]{"[]", 0, 1L}, + new Object[]{null, null, 3L} + ) + : ImmutableList.of( + new Object[]{"[\"d\",null,\"b\"]", 3, 2L}, + new Object[]{"[null,\"b\"]", 2, 2L}, + new Object[]{"[\"a\",\"b\"]", 2, 3L}, + new Object[]{"[\"b\",\"b\"]", 2, 2L}, + new Object[]{"[null]", 1, 1L}, + new Object[]{null, 0, 3L}, + new Object[]{"[]", 0, 1L} + ) + ); + } + @Test public void testArrayAppend() { @@ -1064,6 +1797,53 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayGroupAsLongArrayColumn() + { + // Cannot vectorize as we donot have support in native query subsytem for grouping on arrays + cannotVectorize(); + testQuery( + "SELECT arrayLongNulls, SUM(cnt) FROM druid.arrays GROUP BY 1 ORDER BY 2 DESC", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("arrayLongNulls", "d0", ColumnType.LONG_ARRAY) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + OrderByColumnSpec.Direction.DESCENDING, + StringComparators.NUMERIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() + ), + ImmutableList.of( + new Object[]{null, 3L}, + new Object[]{Arrays.asList(null, 2L, 9L), 2L}, + new Object[]{Arrays.asList(1L, null, 3L), 2L}, + new Object[]{Arrays.asList(1L, 2L, 3L), 2L}, + new Object[]{Arrays.asList(2L, 3L), 2L}, + new Object[]{Collections.emptyList(), 1L}, + new Object[]{Collections.singletonList(null), 1L}, + new Object[]{Collections.singletonList(1L), 1L} + ) + ); + } + @Test public void testArrayGroupAsDoubleArray() @@ -1114,6 +1894,53 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayGroupAsDoubleArrayColumn() + { + // Cannot vectorize as we donot have support in native query subsytem for grouping on arrays + cannotVectorize(); + testQuery( + "SELECT arrayDoubleNulls, SUM(cnt) FROM druid.arrays GROUP BY 1 ORDER BY 2 DESC", + QUERY_CONTEXT_NO_STRINGIFY_ARRAY, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_ARRAYS) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("arrayDoubleNulls", "d0", ColumnType.DOUBLE_ARRAY) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + OrderByColumnSpec.Direction.DESCENDING, + StringComparators.NUMERIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() + ), + ImmutableList.of( + new Object[]{null, 3L}, + new Object[]{Arrays.asList(null, 2.2D, null), 2L}, + new Object[]{Arrays.asList(1.1D, 2.2D, null), 2L}, + new Object[]{Arrays.asList(999.0D, null, 5.5D), 2L}, + new Object[]{Arrays.asList(999.0D, 5.5D, null), 2L}, + new Object[]{Collections.emptyList(), 1L}, + new Object[]{Collections.singletonList(null), 1L}, + new Object[]{Arrays.asList(null, 1.1D), 1L} + ) + ); + } + @Test public void testArrayGroupAsFloatArray() { @@ -1943,6 +2770,177 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + + @Test + public void testArrayAggArrayColumns() + { + msqIncompatible(); + // nested array party + cannotVectorize(); + if (NullHandling.replaceWithDefault()) { + // default value mode plans to selector filters for equality, which do not support array filtering + return; + } + testQuery( + "SELECT ARRAY_AGG(arrayLongNulls), ARRAY_AGG(DISTINCT arrayDouble), ARRAY_AGG(DISTINCT arrayStringNulls) FILTER(WHERE arrayLong = ARRAY[2,3]) FROM arrays WHERE arrayDoubleNulls is not null", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .filters(notNull("arrayDoubleNulls")) + .aggregators( + aggregators( + new ExpressionLambdaAggregatorFactory( + "a0", + ImmutableSet.of("arrayLongNulls"), + "__acc", + "ARRAY>[]", + "ARRAY>[]", + true, + true, + false, + "array_append(\"__acc\", \"arrayLongNulls\")", + "array_concat(\"__acc\", \"a0\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + new ExpressionLambdaAggregatorFactory( + "a1", + ImmutableSet.of("arrayDouble"), + "__acc", + "ARRAY>[]", + "ARRAY>[]", + true, + true, + false, + "array_set_add(\"__acc\", \"arrayDouble\")", + "array_set_add_all(\"__acc\", \"a1\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + new FilteredAggregatorFactory( + new ExpressionLambdaAggregatorFactory( + "a2", + ImmutableSet.of("arrayStringNulls"), + "__acc", + "ARRAY>[]", + "ARRAY>[]", + true, + true, + false, + "array_set_add(\"__acc\", \"arrayStringNulls\")", + "array_set_add_all(\"__acc\", \"a2\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + equality("arrayLong", ImmutableList.of(2, 3), ColumnType.LONG_ARRAY) + ) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + "[[2,3],[null,2,9],[1,null,3],[1,2,3],null,null,[2,3],[null,2,9],[1,null,3],[1,2,3],null]", + "[null,[1.1,2.2,3.3],[1.1,3.3],[3.3,4.4,5.5]]", + "[[null,\"b\"]]" + } + ) + ); + } + + @Test + public void testArrayConcatAggArrayColumns() + { + cannotVectorize(); + if (NullHandling.replaceWithDefault()) { + // default value mode plans to selector filters for equality, which do not support array filtering + return; + } + testQuery( + "SELECT ARRAY_CONCAT_AGG(arrayLongNulls), ARRAY_CONCAT_AGG(DISTINCT arrayDouble), ARRAY_CONCAT_AGG(DISTINCT arrayStringNulls) FILTER(WHERE arrayLong = ARRAY[2,3]) FROM arrays WHERE arrayDoubleNulls is not null", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(DATA_SOURCE_ARRAYS) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .filters(notNull("arrayDoubleNulls")) + .aggregators( + aggregators( + new ExpressionLambdaAggregatorFactory( + "a0", + ImmutableSet.of("arrayLongNulls"), + "__acc", + "ARRAY[]", + "ARRAY[]", + true, + false, + false, + "array_concat(\"__acc\", \"arrayLongNulls\")", + "array_concat(\"__acc\", \"a0\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + new ExpressionLambdaAggregatorFactory( + "a1", + ImmutableSet.of("arrayDouble"), + "__acc", + "ARRAY[]", + "ARRAY[]", + true, + false, + false, + "array_set_add_all(\"__acc\", \"arrayDouble\")", + "array_set_add_all(\"__acc\", \"a1\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + new FilteredAggregatorFactory( + new ExpressionLambdaAggregatorFactory( + "a2", + ImmutableSet.of("arrayStringNulls"), + "__acc", + "ARRAY[]", + "ARRAY[]", + true, + false, + false, + "array_set_add_all(\"__acc\", \"arrayStringNulls\")", + "array_set_add_all(\"__acc\", \"a2\")", + null, + null, + ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, + TestExprMacroTable.INSTANCE + ), + equality("arrayLong", ImmutableList.of(2, 3), ColumnType.LONG_ARRAY) + ) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{ + "[2,3,null,2,9,1,null,3,1,2,3,2,3,null,2,9,1,null,3,1,2,3]", + "[1.1,2.2,3.3,4.4,5.5]", + "[null,\"b\"]" + } + ) + ); + } + @Test public void testArrayAggToString() { @@ -2601,30 +3599,6 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest } - public static void assertResultsDeepEquals(String sql, List expected, List results) - { - for (int row = 0; row < results.size(); row++) { - for (int col = 0; col < results.get(row).length; col++) { - final String rowString = StringUtils.format("result #%d: %s", row + 1, sql); - assertDeepEquals(rowString + " - column: " + col + ":", expected.get(row)[col], results.get(row)[col]); - } - } - } - - public static void assertDeepEquals(String path, Object expected, Object actual) - { - if (expected instanceof List && actual instanceof List) { - List expectedList = (List) expected; - List actualList = (List) actual; - Assert.assertEquals(path + " arrays length mismatch", expectedList.size(), actualList.size()); - for (int i = 0; i < expectedList.size(); i++) { - assertDeepEquals(path + "[" + i + "]", expectedList.get(i), actualList.get(i)); - } - } else { - Assert.assertEquals(path, expected, actual); - } - } - @Test public void testUnnestInline() { @@ -2743,6 +3717,312 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestArrayColumnsString() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayString) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayString\"", ColumnType.STRING_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"d"}, + new Object[]{"e"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{"e"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"c"} + ) + ); + } + + @Test + public void testUnnestArrayColumnsStringNulls() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayStringNulls) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayStringNulls\"", ColumnType.STRING_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"d"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"d"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestArrayColumnsLong() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayLong) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLong\"", ColumnType.LONG_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{4L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{4L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{4L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{4L}, + new Object[]{2L}, + new Object[]{3L} + ) + ); + } + + @Test + public void testUnnestArrayColumnsLongNulls() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayLongNulls) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{2L}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{null}, + new Object[]{2L}, + new Object[]{9L}, + new Object[]{1L}, + new Object[]{null}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{2L}, + new Object[]{3L}, + new Object[]{null}, + new Object[]{null}, + new Object[]{2L}, + new Object[]{9L}, + new Object[]{1L}, + new Object[]{null}, + new Object[]{3L}, + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} + ) + ); + } + + @Test + public void testUnnestArrayColumnsDouble() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayDouble) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayDouble\"", ColumnType.DOUBLE_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{4.0D}, + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{1.1D}, + new Object[]{3.3D}, + new Object[]{3.3D}, + new Object[]{4.4D}, + new Object[]{5.5D}, + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{4.0D}, + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{3.3D}, + new Object[]{1.1D}, + new Object[]{3.3D}, + new Object[]{3.3D}, + new Object[]{4.4D}, + new Object[]{5.5D} + ) + ); + } + + @Test + public void testUnnestArrayColumnsDoubleNulls() + { + cannotVectorize(); + testQuery( + "SELECT a FROM druid.arrays, UNNEST(arrayDoubleNulls) as unnested (a)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayDoubleNulls\"", ColumnType.DOUBLE_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{null}, + new Object[]{999.0D}, + new Object[]{5.5D}, + new Object[]{null}, + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{null}, + new Object[]{null}, + new Object[]{2.2D}, + new Object[]{null}, + new Object[]{999.0D}, + new Object[]{null}, + new Object[]{5.5D}, + new Object[]{null}, + new Object[]{1.1D}, + new Object[]{999.0D}, + new Object[]{5.5D}, + new Object[]{null}, + new Object[]{1.1D}, + new Object[]{2.2D}, + new Object[]{null}, + new Object[]{null}, + new Object[]{2.2D}, + new Object[]{null}, + new Object[]{999.0D}, + new Object[]{null}, + new Object[]{5.5D} + ) + ); + } + @Test public void testUnnestTwice() { @@ -2820,6 +4100,89 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestTwiceArrayColumns() + { + cannotVectorize(); + testQuery( + "SELECT arrayStringNulls, arrayLongNulls, usn, uln" + + " FROM\n" + + " druid.arrays,\n" + + " UNNEST(arrayStringNulls) as t2 (usn),\n" + + " UNNEST(arrayLongNulls) as t3 (uln)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn( + "j0.unnest", + "\"arrayStringNulls\"", + ColumnType.STRING_ARRAY + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"arrayLongNulls\"", + ColumnType.LONG_ARRAY + ), + null + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("_j0.unnest", "arrayLongNulls", "arrayStringNulls", "j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "a", 2L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "a", 3L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "b", 2L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "b", 3L}, + new Object[]{Arrays.asList("b", "b"), Collections.singletonList(1L), "b", 1L}, + new Object[]{Arrays.asList("b", "b"), Collections.singletonList(1L), "b", 1L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "a", 1L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "a", null}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "a", 3L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "b", 1L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "b", null}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(1L, null, 3L), "b", 3L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 3L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 3L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 3L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "a", 2L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "a", 3L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "b", 2L}, + new Object[]{Arrays.asList("a", "b"), Arrays.asList(2L, 3L), "b", 3L}, + new Object[]{Arrays.asList("b", "b"), Collections.singletonList(null), "b", null}, + new Object[]{Arrays.asList("b", "b"), Collections.singletonList(null), "b", null}, + new Object[]{Collections.singletonList(null), Arrays.asList(null, 2L, 9L), NullHandling.defaultStringValue(), null}, + new Object[]{Collections.singletonList(null), Arrays.asList(null, 2L, 9L), NullHandling.defaultStringValue(), 2L}, + new Object[]{Collections.singletonList(null), Arrays.asList(null, 2L, 9L), NullHandling.defaultStringValue(), 9L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "d", 3L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), NullHandling.defaultStringValue(), 3L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 1L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 2L}, + new Object[]{Arrays.asList("d", null, "b"), Arrays.asList(1L, 2L, 3L), "b", 3L} + ) + ); + } + @Test public void testUnnestTwiceWithFiltersAndExpressions() { @@ -2885,6 +4248,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test public void testUnnestThriceWithFiltersOnDimAndUnnestCol() { @@ -3052,6 +4416,74 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumns() + { + cannotVectorize(); + String sql = " SELECT arrayString, uln, udn, usn FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM arrays, UNNEST(arrayLongNulls) as ut(uln))" + + " ,UNNEST(arrayDoubleNulls) as ut(udn) \n" + + " ), UNNEST(arrayStringNulls) as ut(usn) " + + " WHERE arrayString = ARRAY['a','b'] AND uln = 1 AND udn = 2.2 AND usn = 'a'"; + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn( + "j0.unnest", + "\"arrayLongNulls\"", + ColumnType.LONG_ARRAY + ), + null + ), + and( + NullHandling.sqlCompatible() + ? equality("arrayString", ImmutableList.of("a", "b"), ColumnType.STRING_ARRAY) + : expressionFilter("(\"arrayString\" == array('a','b'))"), + equality("j0.unnest", 1, ColumnType.LONG) + ) + ), + expressionVirtualColumn( + "_j0.unnest", + "\"arrayDoubleNulls\"", + ColumnType.DOUBLE_ARRAY + ), + equality("_j0.unnest", 2.2, ColumnType.DOUBLE) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"arrayStringNulls\"", + ColumnType.STRING_ARRAY + ), + equality("__j0.unnest", "a", ColumnType.STRING) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns( + expressionVirtualColumn("v0", "array('a','b')", ColumnType.STRING_ARRAY), + expressionVirtualColumn("v1", "1", ColumnType.LONG) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "v0", "v1")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, + ImmutableList.of( + new Object[]{ImmutableList.of("a", "b"), 1L, 2.2D, "a"} + ) + ); + } + @Test public void testUnnestThriceWithFiltersOnDimAndUnnestColumnsORCombinations() { @@ -3132,6 +4564,81 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ) ); } + + @Test + public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumnsOrFilters() + { + cannotVectorize(); + String sql = " SELECT arrayString, uln, udn, usn FROM \n" + + " ( SELECT * FROM \n" + + " ( SELECT * FROM arrays, UNNEST(arrayLongNulls) as ut(uln))" + + " ,UNNEST(arrayDoubleNulls) as ut(udn) \n" + + " ), UNNEST(arrayStringNulls) as ut(usn) " + + " WHERE arrayString = ARRAY['a','b'] AND (uln = 1 OR udn = 2.2) AND usn = 'a'"; + List> expectedQuerySc = ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + FilteredDataSource.create( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn( + "j0.unnest", + "\"arrayLongNulls\"", + ColumnType.LONG_ARRAY + ), + null + ), + NullHandling.sqlCompatible() + ? equality("arrayString", ImmutableList.of("a", "b"), ColumnType.STRING_ARRAY) + : expressionFilter("(\"arrayString\" == array('a','b'))") + ), + expressionVirtualColumn( + "_j0.unnest", + "\"arrayDoubleNulls\"", + ColumnType.DOUBLE_ARRAY + ), + null + ), + or( + equality("j0.unnest", 1, ColumnType.LONG), + equality("_j0.unnest", 2.2, ColumnType.DOUBLE) + ) + ), + expressionVirtualColumn( + "__j0.unnest", + "\"arrayStringNulls\"", + ColumnType.STRING_ARRAY + ), + equality("__j0.unnest", "a", ColumnType.STRING) + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns( + expressionVirtualColumn("v0", "array('a','b')", ColumnType.STRING_ARRAY) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("__j0.unnest", "_j0.unnest", "j0.unnest", "v0")) + .build() + ); + testQuery( + sql, + QUERY_CONTEXT_UNNEST, + expectedQuerySc, + ImmutableList.of( + new Object[]{ImmutableList.of("a", "b"), 1L, 1.1D, "a"}, + new Object[]{ImmutableList.of("a", "b"), 1L, 2.2D, "a"}, + new Object[]{ImmutableList.of("a", "b"), 1L, null, "a"}, + new Object[]{ImmutableList.of("a", "b"), null, 2.2D, "a"}, + new Object[]{ImmutableList.of("a", "b"), 3L, 2.2D, "a"} + ) + ); + } + @Test public void testUnnestWithGroupBy() { @@ -3177,6 +4684,36 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestWithGroupByArrayColumn() + { + cannotVectorize(); + testQuery( + "SELECT usn FROM druid.arrays, UNNEST(arrayStringNulls) as u (usn) GROUP BY usn ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayStringNulls\"", ColumnType.STRING_ARRAY), + null + )) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.STRING)) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_UNNEST) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"d"} + ) + ); + } + @Test public void testUnnestWithGroupByOrderBy() { @@ -4752,6 +6289,32 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestWithSumOnUnnestedArrayColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "select sum(c) col from druid.arrays, unnest(arrayDoubleNulls) as u(c)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayDoubleNulls\"", ColumnType.DOUBLE_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .context(QUERY_CONTEXT_UNNEST) + .aggregators(aggregators(new DoubleSumAggregatorFactory("a0", "j0.unnest"))) + .build() + ), + ImmutableList.of( + new Object[]{4030.0999999999995} + ) + ); + } + @Test public void testUnnestWithGroupByHavingWithWhereOnAggCol() { @@ -4813,6 +6376,79 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestWithGroupByWithWhereOnUnnestArrayCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT uln, COUNT(*) FROM druid.arrays, UNNEST(arrayLongNulls) AS unnested(uln) WHERE uln IN (1, 2, 3) GROUP BY uln", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + NullHandling.sqlCompatible() + ? or( + equality("j0.unnest", 1L, ColumnType.LONG), + equality("j0.unnest", 2L, ColumnType.LONG), + equality("j0.unnest", 3L, ColumnType.LONG) + ) + : in("j0.unnest", ImmutableList.of("1", "2", "3"), null) + )) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG)) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new CountAggregatorFactory("a0")) + .setContext(QUERY_CONTEXT_UNNEST) + .build() + ), + ImmutableList.of( + new Object[]{1L, 5L}, + new Object[]{2L, 6L}, + new Object[]{3L, 6L} + ) + ); + } + + @Test + public void testUnnestWithGroupByHavingWithWhereOnUnnestArrayCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT uln, COUNT(*) FROM druid.arrays, UNNEST(arrayLongNulls) AS unnested(uln) WHERE uln IN (1, 2, 3) GROUP BY uln HAVING uln=1", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + NullHandling.sqlCompatible() + ? or( + equality("j0.unnest", 1L, ColumnType.LONG), + equality("j0.unnest", 2L, ColumnType.LONG), + equality("j0.unnest", 3L, ColumnType.LONG) + ) + : in("j0.unnest", ImmutableList.of("1", "2", "3"), null) + )) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG)) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new CountAggregatorFactory("a0")) + .setDimFilter(equality("j0.unnest", 1L, ColumnType.LONG)) + .setContext(QUERY_CONTEXT_UNNEST) + .build() + ), + ImmutableList.of( + new Object[]{1L, 5L} + ) + ); + } + @Test public void testUnnestVirtualWithColumnsAndNullIf() { @@ -4893,6 +6529,45 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestWithTimeFilterOnlyArrayColumn() + { + testQuery( + "select c from arrays, unnest(arrayStringNulls) as u(c)" + + " where __time >= TIMESTAMP '2023-01-02 00:00:00' and __time <= TIMESTAMP '2023-01-03 00:10:00'", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + FilteredDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + range("__time", ColumnType.LONG, 1672617600000L, 1672704600000L, false, false) + ), + expressionVirtualColumn("j0.unnest", "\"arrayStringNulls\"", ColumnType.STRING_ARRAY), + null + )) + .intervals(querySegmentSpec(Intervals.of("2023-01-02T00:00:00.000Z/2023-01-03T00:10:00.001Z"))) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"d"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{"b"} + ) + ); + } + @Test public void testUnnestWithTimeFilterAndAnotherFilter() { @@ -5108,6 +6783,56 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testUnnestWithTimeFilterInsideSubqueryArrayColumns() + { + testQuery( + "select uln from (select * from arrays, UNNEST(arrayLongNulls) as u(uln)" + + " where __time >= TIMESTAMP '2023-01-02 00:00:00' and __time <= TIMESTAMP '2023-01-03 00:10:00' LIMIT 2) \n" + + " where ARRAY_CONTAINS(arrayLongNulls, ARRAY[2])", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + new QueryDataSource( + newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + FilteredDataSource.create( + new TableDataSource(DATA_SOURCE_ARRAYS), + range("__time", ColumnType.LONG, 1672617600000L, 1672704600000L, false, false) + ), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + ) + ) + .intervals(querySegmentSpec(Intervals.of( + "2023-01-02T00:00:00.000Z/2023-01-03T00:10:00.001Z"))) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .columns("arrayLongNulls", "j0.unnest") + .limit(2) + .context(QUERY_CONTEXT_UNNEST) + .build() + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + expressionFilter("array_contains(\"arrayLongNulls\",array(2))") + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{2L}, + new Object[]{3L} + ) + ); + } + @Test public void testUnnestWithFilterAndUnnestNestedBackToBack() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java index c6f05697026..13288f0caa0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java @@ -172,7 +172,7 @@ public class TestDataBuilder ); - private static final IncrementalIndexSchema INDEX_SCHEMA = new IncrementalIndexSchema.Builder() + public static final IncrementalIndexSchema INDEX_SCHEMA = new IncrementalIndexSchema.Builder() .withMetrics( new CountAggregatorFactory("cnt"), new FloatSumAggregatorFactory("m1", "m1"),