Refactoring and bug fixes on top of unnest. The allowList now is not passed … (#13922)

* Refactoring and bug fixes on top of unnest. The filter now is passed inside the unnest cursors. Added tests for scenarios such as
1. filter on unnested column which involves a left filter rewrite
2. filter on unnested virtual column which pushes the filter to the right only and involves no rewrite
3. not filters
4. SQL functions applied on top of unnested column
5. null present in first row of the column to be unnested
This commit is contained in:
somu-imply 2023-03-14 16:05:56 -07:00 committed by GitHub
parent 4493275d88
commit a7ba361666
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 302 additions and 538 deletions

View File

@ -412,15 +412,13 @@ The `unnest` datasource uses the following syntax:
"type": "expression",
"expression": "\"column_reference\""
},
"outputName": "unnested_target_column",
"allowList": []
},
"outputName": "unnested_target_column"
}
```
* `dataSource.type`: Set this to `unnest`.
* `dataSource.base`: Defines the datasource you want to unnest.
* `dataSource.base.type`: The type of datasource you want to unnest, such as a table.
* `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required.
* `dataSource.allowList`: Optional. The subset of values you want to unnest.
To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md).

View File

@ -29,8 +29,6 @@ import org.apache.druid.segment.UnnestSegmentReference;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.utils.JvmUtils;
import javax.annotation.Nullable;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
@ -39,7 +37,6 @@ import java.util.function.Function;
/**
* The data source for representing an unnest operation.
*
* An unnest data source has the following:
* a base data source which is to be unnested
* the column name of the MVD which will be unnested
@ -50,27 +47,23 @@ public class UnnestDataSource implements DataSource
{
private final DataSource base;
private final VirtualColumn virtualColumn;
private final LinkedHashSet<String> allowList;
private UnnestDataSource(
DataSource dataSource,
VirtualColumn virtualColumn,
LinkedHashSet<String> allowList
VirtualColumn virtualColumn
)
{
this.base = dataSource;
this.virtualColumn = virtualColumn;
this.allowList = allowList;
}
@JsonCreator
public static UnnestDataSource create(
@JsonProperty("base") DataSource base,
@JsonProperty("virtualColumn") VirtualColumn virtualColumn,
@Nullable @JsonProperty("allowList") LinkedHashSet<String> allowList
@JsonProperty("virtualColumn") VirtualColumn virtualColumn
)
{
return new UnnestDataSource(base, virtualColumn, allowList);
return new UnnestDataSource(base, virtualColumn);
}
@JsonProperty("base")
@ -85,12 +78,6 @@ public class UnnestDataSource implements DataSource
return virtualColumn;
}
@JsonProperty("allowList")
public LinkedHashSet<String> getAllowList()
{
return allowList;
}
@Override
public Set<String> getTableNames()
{
@ -109,7 +96,7 @@ public class UnnestDataSource implements DataSource
if (children.size() != 1) {
throw new IAE("Expected [1] child, got [%d]", children.size());
}
return new UnnestDataSource(children.get(0), virtualColumn, allowList);
return new UnnestDataSource(children.get(0), virtualColumn);
}
@Override
@ -146,17 +133,15 @@ public class UnnestDataSource implements DataSource
baseSegment ->
new UnnestSegmentReference(
segmentMapFn.apply(baseSegment),
virtualColumn,
allowList
virtualColumn
)
);
}
@Override
public DataSource withUpdatedDataSource(DataSource newSource)
{
return new UnnestDataSource(newSource, virtualColumn, allowList);
return new UnnestDataSource(newSource, virtualColumn);
}
@Override
@ -203,7 +188,6 @@ public class UnnestDataSource implements DataSource
return "UnnestDataSource{" +
"base=" + base +
", column='" + virtualColumn + '\'' +
", allowList=" + allowList +
'}';
}

View File

@ -30,7 +30,6 @@ import org.joda.time.DateTime;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
/**
@ -50,9 +49,6 @@ import java.util.List;
* unnestCursor.advance() -> 'e'
* <p>
* <p>
* The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to
* the next available match.
* <p>
* The index reference points to the index of each row that the unnest cursor is accessing through currentVal
* The index ranges from 0 to the size of the list in each row which is held in the unnestListForCurrentRow
* <p>
@ -65,18 +61,17 @@ public class UnnestColumnValueSelectorCursor implements Cursor
private final ColumnValueSelector columnValueSelector;
private final VirtualColumn unnestColumn;
private final String outputName;
private final LinkedHashSet<String> allowSet;
private int index;
private Object currentVal;
private List<Object> unnestListForCurrentRow;
private boolean needInitialization;
public UnnestColumnValueSelectorCursor(
Cursor cursor,
ColumnSelectorFactory baseColumnSelectorFactory,
VirtualColumn unnestColumn,
String outputColumnName,
LinkedHashSet<String> allowSet
String outputColumnName
)
{
this.baseCursor = cursor;
@ -89,7 +84,6 @@ public class UnnestColumnValueSelectorCursor implements Cursor
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
this.allowSet = allowSet;
}
@Override
@ -194,11 +188,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor
public Object getObject()
{
if (!unnestListForCurrentRow.isEmpty()) {
if (allowSet == null || allowSet.isEmpty()) {
return unnestListForCurrentRow.get(index);
} else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) {
return unnestListForCurrentRow.get(index);
}
return unnestListForCurrentRow.get(index);
}
return null;
}
@ -253,9 +243,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor
@Override
public void advanceUninterruptibly()
{
do {
advanceAndUpdate();
} while (matchAndProceed());
advanceAndUpdate();
}
@Override
@ -304,19 +292,10 @@ public class UnnestColumnValueSelectorCursor implements Cursor
/**
* This initializes the unnest cursor and creates data structures
* to start iterating over the values to be unnested.
* This would also create a bitset for dictonary encoded columns to
* check for matching values specified in allowedList of UnnestDataSource.
*/
private void initialize()
{
getNextRow();
if (allowSet != null) {
if (!allowSet.isEmpty()) {
if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) {
advance();
}
}
}
needInitialization = false;
}
@ -338,22 +317,4 @@ public class UnnestColumnValueSelectorCursor implements Cursor
index++;
}
}
/**
* This advances the unnest cursor in cases where an allowList is specified
* and the current value at the unnest cursor is not in the allowList.
* The cursor in such cases is moved till the next match is found.
*
* @return a boolean to indicate whether to stay or move cursor
*/
private boolean matchAndProceed()
{
boolean matchStatus;
if (allowSet == null || allowSet.isEmpty()) {
matchStatus = true;
} else {
matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index));
}
return !baseCursor.isDone() && !matchStatus;
}
}

View File

@ -31,8 +31,6 @@ import org.apache.druid.segment.data.IndexedInts;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
import java.util.BitSet;
import java.util.LinkedHashSet;
/**
* The cursor to help unnest MVDs with dictionary encoding.
@ -58,15 +56,6 @@ import java.util.LinkedHashSet;
* <p>
* Total 5 advance calls above
* <p>
* The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to
* the next available match. The hashSet is converted into a bitset (during initialization) for efficiency.
* If allowSet is ['c', 'd'] then the advance moves over to the next available match
* <p>
* advance() -> 2 -> 'c'
* advance() -> 3 -> 'd' (advances base cursor first)
* advance() -> 2 -> 'c'
* <p>
* Total 3 advance calls in this case
* <p>
* The index reference points to the index of each row that the unnest cursor is accessing
* The indexedInts for each row are held in the indexedIntsForCurrentRow object
@ -79,8 +68,6 @@ public class UnnestDimensionCursor implements Cursor
private final DimensionSelector dimSelector;
private final VirtualColumn unnestColumn;
private final String outputName;
private final LinkedHashSet<String> allowSet;
private final BitSet allowedBitSet;
private final ColumnSelectorFactory baseColumnSelectorFactory;
private int index;
@Nullable
@ -92,8 +79,7 @@ public class UnnestDimensionCursor implements Cursor
Cursor cursor,
ColumnSelectorFactory baseColumnSelectorFactory,
VirtualColumn unnestColumn,
String outputColumnName,
LinkedHashSet<String> allowSet
String outputColumnName
)
{
this.baseCursor = cursor;
@ -106,8 +92,6 @@ public class UnnestDimensionCursor implements Cursor
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
this.allowSet = allowSet;
this.allowedBitSet = new BitSet();
}
@Override
@ -158,6 +142,9 @@ public class UnnestDimensionCursor implements Cursor
@Override
public boolean matches()
{
if (indexedIntsForCurrentRow.size() <= 0) {
return false;
}
return idForLookup == indexedIntsForCurrentRow.get(index);
}
@ -188,14 +175,7 @@ public class UnnestDimensionCursor implements Cursor
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) {
return null;
}
if (allowedBitSet.isEmpty()) {
if (allowSet == null || allowSet.isEmpty()) {
return lookupName(indexedIntsForCurrentRow.get(index));
}
} else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
return lookupName(indexedIntsForCurrentRow.get(index));
}
return null;
return lookupName(indexedIntsForCurrentRow.get(index));
}
@Override
@ -207,9 +187,6 @@ public class UnnestDimensionCursor implements Cursor
@Override
public int getValueCardinality()
{
if (!allowedBitSet.isEmpty()) {
return allowedBitSet.cardinality();
}
return dimSelector.getValueCardinality();
}
@ -290,9 +267,7 @@ public class UnnestDimensionCursor implements Cursor
@Override
public void advanceUninterruptibly()
{
do {
advanceAndUpdate();
} while (matchAndProceed());
advanceAndUpdate();
}
@Override
@ -330,23 +305,13 @@ public class UnnestDimensionCursor implements Cursor
@Nullable
private void initialize()
{
IdLookup idLookup = dimSelector.idLookup();
index = 0;
this.indexIntsForRow = new SingleIndexInts();
if (allowSet != null && !allowSet.isEmpty() && idLookup != null) {
for (String s : allowSet) {
if (idLookup.lookupId(s) >= 0) {
allowedBitSet.set(idLookup.lookupId(s));
}
}
}
if (dimSelector.getObject() != null) {
this.indexedIntsForCurrentRow = dimSelector.getRow();
}
if (!allowedBitSet.isEmpty()) {
if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
advance();
}
}
needInitialization = false;
}
@ -362,6 +327,9 @@ public class UnnestDimensionCursor implements Cursor
index = 0;
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
if (!baseCursor.isDone()) {
indexedIntsForCurrentRow = dimSelector.getRow();
}
}
} else {
if (index >= indexedIntsForCurrentRow.size() - 1) {
@ -378,23 +346,6 @@ public class UnnestDimensionCursor implements Cursor
}
}
/**
* This advances the unnest cursor in cases where an allowList is specified
* and the current value at the unnest cursor is not in the allowList.
* The cursor in such cases is moved till the next match is found.
*
* @return a boolean to indicate whether to stay or move cursor
*/
private boolean matchAndProceed()
{
boolean matchStatus;
if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) {
matchStatus = true;
} else {
matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index));
}
return !baseCursor.isDone() && !matchStatus;
}
// Helper class to help in returning
// getRow from the dimensionSelector

View File

@ -28,7 +28,6 @@ import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.Optional;
/**
@ -41,17 +40,15 @@ public class UnnestSegmentReference implements SegmentReference
private final SegmentReference baseSegment;
private final VirtualColumn unnestColumn;
private final LinkedHashSet<String> allowSet;
public UnnestSegmentReference(
SegmentReference baseSegment,
VirtualColumn unnestColumn,
LinkedHashSet<String> allowList
VirtualColumn unnestColumn
)
{
this.baseSegment = baseSegment;
this.unnestColumn = unnestColumn;
this.allowSet = allowList;
}
@Override
@ -103,8 +100,7 @@ public class UnnestSegmentReference implements SegmentReference
{
return new UnnestStorageAdapter(
baseSegment.asStorageAdapter(),
unnestColumn,
allowSet
unnestColumn
);
}

View File

@ -62,18 +62,15 @@ public class UnnestStorageAdapter implements StorageAdapter
private final StorageAdapter baseAdapter;
private final VirtualColumn unnestColumn;
private final String outputColumnName;
private final LinkedHashSet<String> allowSet;
public UnnestStorageAdapter(
final StorageAdapter baseAdapter,
final VirtualColumn unnestColumn,
final LinkedHashSet<String> allowSet
final VirtualColumn unnestColumn
)
{
this.baseAdapter = baseAdapter;
this.unnestColumn = unnestColumn;
this.outputColumnName = unnestColumn.getOutputName();
this.allowSet = allowSet;
}
@Override
@ -87,7 +84,7 @@ public class UnnestStorageAdapter implements StorageAdapter
)
{
final String inputColumn = getUnnestInputIfDirectAccess();
final Pair<Filter, Filter> filterPair = computeBaseAndPostCorrelateFilters(
final Pair<Filter, Filter> filterPair = computeBaseAndPostUnnestFilters(
filter,
virtualColumns,
inputColumn,
@ -120,16 +117,14 @@ public class UnnestStorageAdapter implements StorageAdapter
retVal,
retVal.getColumnSelectorFactory(),
unnestColumn,
outputColumnName,
allowSet
outputColumnName
);
} else {
retVal = new UnnestColumnValueSelectorCursor(
retVal,
retVal.getColumnSelectorFactory(),
unnestColumn,
outputColumnName,
allowSet
outputColumnName
);
}
} else {
@ -137,8 +132,7 @@ public class UnnestStorageAdapter implements StorageAdapter
retVal,
retVal.getColumnSelectorFactory(),
unnestColumn,
outputColumnName,
allowSet
outputColumnName
);
}
return PostJoinCursor.wrap(
@ -260,9 +254,9 @@ public class UnnestStorageAdapter implements StorageAdapter
* @param inputColumn input column to unnest if it's a direct access; otherwise null
* @param inputColumnCapabilites input column capabilities if known; otherwise null
*
* @return pair of pre- and post-correlate filters
* @return pair of pre- and post-unnest filters
*/
private Pair<Filter, Filter> computeBaseAndPostCorrelateFilters(
private Pair<Filter, Filter> computeBaseAndPostUnnestFilters(
@Nullable final Filter queryFilter,
final VirtualColumns queryVirtualColumns,
@Nullable final String inputColumn,
@ -282,7 +276,7 @@ public class UnnestStorageAdapter implements StorageAdapter
final Set<String> requiredColumns = filter.getRequiredColumns();
// Run filter post-correlate if it refers to any virtual columns.
// Run filter post-unnest if it refers to any virtual columns.
if (queryVirtualColumns.getVirtualColumns().length > 0) {
for (String column : requiredColumns) {
if (queryVirtualColumns.exists(column)) {
@ -293,13 +287,15 @@ public class UnnestStorageAdapter implements StorageAdapter
}
if (requiredColumns.contains(outputColumnName)) {
// Try to move filter pre-correlate if possible.
// Rewrite filter post-unnest if possible.
final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites);
if (newFilter != null) {
// Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting
// any rows that do not match this filter at all.
preFilters.add(newFilter);
} else {
postFilters.add(filter);
}
// Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values.
postFilters.add(filter);
} else {
preFilters.add(filter);
}
@ -308,11 +304,6 @@ public class UnnestStorageAdapter implements StorageAdapter
final FilterSplitter filterSplitter = new FilterSplitter();
if (allowSet != null && !allowSet.isEmpty()) {
// Filter on input column if possible (it may be faster); otherwise use output column.
filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet));
}
if (queryFilter instanceof AndFilter) {
for (Filter filter : ((AndFilter) queryFilter).getFilters()) {
filterSplitter.add(filter);

View File

@ -111,8 +111,7 @@ public class QueryRunnerTestHelper
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
)
);
public static final Granularity DAY_GRAN = Granularities.DAY;

View File

@ -239,8 +239,7 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
)
))
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(new DefaultDimensionSpec("quality", "alias"))
@ -453,8 +452,7 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
)
))
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
@ -566,8 +564,7 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest
"mv_to_array(placementish)",
ColumnType.STRING_ARRAY,
TestExprMacroTable.INSTANCE
),
null
)
);
GroupByQuery query = makeQueryBuilder()
@ -655,8 +652,7 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest
"array(\"market\",\"quality\")",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
),
null
)
);
GroupByQuery query = makeQueryBuilder()

View File

@ -22,7 +22,6 @@ package org.apache.druid.query.scan;
import com.google.common.collect.Lists;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.DefaultGenericQueryMetricsFactory;
import org.apache.druid.query.Druids;
import org.apache.druid.query.QueryPlus;
@ -46,9 +45,7 @@ import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@ -96,27 +93,6 @@ public class UnnestScanQueryRunnerTest extends InitializedNullHandlingTest
.legacy(legacy);
}
private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet()
{
List<String> allowList = Arrays.asList("a", "b", "c");
LinkedHashSet allowSet = new LinkedHashSet(allowList);
return Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
allowSet
))
.columns(Collections.emptyList())
.eternityInterval()
.limit(3)
.legacy(legacy);
}
@Test
public void testScanOnUnnest()
{
@ -188,8 +164,7 @@ public class UnnestScanQueryRunnerTest extends InitializedNullHandlingTest
"mv_to_array(placementish)",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
),
null
)
))
.columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
.eternityInterval()
@ -258,8 +233,7 @@ public class UnnestScanQueryRunnerTest extends InitializedNullHandlingTest
"array(\"market\",\"quality\")",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
),
null
)
))
.columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
.eternityInterval()
@ -461,65 +435,6 @@ public class UnnestScanQueryRunnerTest extends InitializedNullHandlingTest
ScanQueryRunnerTest.verify(ascendingExpectedResults, results);
}
@Test
public void testUnnestRunnerNonNullAllowSet()
{
ScanQuery query = newTestUnnestQueryWithAllowSet()
.intervals(I_0112_0114)
.columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
.limit(3)
.build();
final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
FACTORY,
new IncrementalIndexSegment(
index,
QueryRunnerTestHelper.SEGMENT_ID
),
query,
"rtIndexvc"
);
Iterable<ScanResultValue> results = queryRunner.run(QueryPlus.wrap(query)).toList();
String[] columnNames;
if (legacy) {
columnNames = new String[]{
getTimestampName() + ":TIME",
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST
};
} else {
columnNames = new String[]{
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST
};
}
String[] values;
if (legacy) {
values = new String[]{
"2011-01-12T00:00:00.000Z\ta",
"2011-01-12T00:00:00.000Z\tb",
"2011-01-13T00:00:00.000Z\ta"
};
} else {
values = new String[]{
"a",
"b",
"a"
};
}
final List<List<Map<String, Object>>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values);
List<ScanResultValue> expectedResults = toExpected(
events,
legacy
? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
: Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST),
0,
3
);
ScanQueryRunnerTest.verify(expectedResults, results);
}
private String getTimestampName()
{

View File

@ -258,8 +258,7 @@ public class UnnestTopNQueryRunnerTest extends InitializedNullHandlingTest
"mv_to_array(\"placementish\")",
ColumnType.STRING_ARRAY,
TestExprMacroTable.INSTANCE
),
null
)
))
.granularity(QueryRunnerTestHelper.ALL_GRAN)
.dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
@ -341,8 +340,7 @@ public class UnnestTopNQueryRunnerTest extends InitializedNullHandlingTest
"array(\"market\",\"quality\")",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
),
null
)
))
.granularity(QueryRunnerTestHelper.ALL_GRAN)
.dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)

View File

@ -35,14 +35,11 @@ import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest
{
private static String OUTPUT_NAME = "unnested-column";
private static LinkedHashSet<String> IGNORE_SET = null;
private static LinkedHashSet<String> IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f"));
@BeforeClass
public static void setUpClass()
@ -73,8 +70,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -107,8 +103,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -139,8 +134,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -176,8 +170,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -210,8 +203,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -240,8 +232,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -275,8 +266,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -309,15 +299,13 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor(
childCursor,
childCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()),
"tmp-out",
IGNORE_SET
"tmp-out"
);
ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory()
.makeColumnValueSelector("tmp-out");
@ -351,8 +339,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -389,8 +376,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -408,44 +394,6 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
Assert.assertEquals(k, 10);
}
@Test
public void test_list_unnest_cursors_user_supplied_list_with_ignore_set()
{
List<Object> inputList = Arrays.asList(
Arrays.asList("a", "b", "c"),
Arrays.asList("e", "f", "g", "h", "i"),
Collections.singletonList("j")
);
List<String> expectedResults = Arrays.asList("b", "f");
//Create base cursor
ListCursor listCursor = new ListCursor(inputList);
//Create unnest cursor
UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor(
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET1
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
if (valueSelectorVal == null) {
Assert.assertEquals(null, expectedResults.get(k));
} else {
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
}
k++;
unnestCursor.advance();
}
Assert.assertEquals(k, 2);
}
@Test
public void test_list_unnest_cursors_user_supplied_list_double()
{
@ -465,8 +413,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -499,8 +446,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -533,8 +479,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -570,8 +515,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@ -606,8 +550,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
// should return a column value selector for this case
BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory()
@ -649,8 +592,7 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling
listCursor,
listCursor.getColumnSelectorFactory(),
new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()),
OUTPUT_NAME,
IGNORE_SET
OUTPUT_NAME
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);

View File

@ -46,7 +46,6 @@ import org.junit.BeforeClass;
import org.junit.Test;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
@ -56,13 +55,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER;
private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER;
private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1;
private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER2;
private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER3;
private static List<StorageAdapter> ADAPTERS;
private static String COLUMNNAME = "multi-string1";
private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1";
private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again";
private static LinkedHashSet<String> IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5"));
@BeforeClass
public static void setup()
@ -86,29 +82,17 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX);
UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter(
INCREMENTAL_INDEX_STORAGE_ADAPTER,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
null
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil())
);
UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter(
INCREMENTAL_INDEX_STORAGE_ADAPTER,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
IGNORE_SET
);
UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter(
UNNEST_STORAGE_ADAPTER,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
null
);
UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter(
UNNEST_STORAGE_ADAPTER1,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()),
IGNORE_SET
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil())
);
ADAPTERS = ImmutableList.of(
UNNEST_STORAGE_ADAPTER,
UNNEST_STORAGE_ADAPTER1,
UNNEST_STORAGE_ADAPTER2,
UNNEST_STORAGE_ADAPTER3
UNNEST_STORAGE_ADAPTER1
);
}
@ -217,9 +201,9 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
@Test
public void test_two_levels_of_unnest_adapters()
{
Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors(
Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors(
null,
UNNEST_STORAGE_ADAPTER2.getInterval(),
UNNEST_STORAGE_ADAPTER1.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
@ -256,148 +240,6 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
});
}
@Test
public void test_unnest_adapters_with_allowList()
{
final String columnName = "multi-string1";
Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors(
null,
UNNEST_STORAGE_ADAPTER1.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME);
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
Object valueSelectorVal = valueSelector.getObject();
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
} else if (valueSelectorVal == null) {
Assert.assertNull(valueSelectorVal);
}
cursor.advance();
count++;
}
/*
each row has 8 distinct entries.
allowlist has 3 entries also the value cardinality
unnest will have 3 distinct entries
*/
Assert.assertEquals(count, 3);
Assert.assertEquals(dimSelector.getValueCardinality(), 3);
return null;
});
}
@Test
public void test_two_levels_of_unnest_adapters_with_allowList()
{
final String columnName = "multi-string1";
Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors(
null,
UNNEST_STORAGE_ADAPTER3.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3;
assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
ColumnCapabilities.Capable.TRUE
);
Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME));
Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME));
cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1));
ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1);
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
Object valueSelectorVal = valueSelector.getObject();
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
} else if (valueSelectorVal == null) {
Assert.assertNull(valueSelectorVal);
}
cursor.advance();
count++;
}
/*
each row has 8 distinct entries.
allowlist has 3 entries also the value cardinality
unnest will have 3 distinct entries
unnest of that unnest will have 3*3 = 9 entries
*/
Assert.assertEquals(count, 9);
Assert.assertEquals(dimSelector.getValueCardinality(), 3);
return null;
});
}
@Test
public void test_unnest_adapters_methods_with_allowList()
{
final String columnName = "multi-string1";
Sequence<Cursor> cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors(
null,
UNNEST_STORAGE_ADAPTER1.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1;
assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
ColumnCapabilities.Capable.TRUE
);
Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME));
Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME));
cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
IdLookup idlookUp = dimSelector.idLookup();
Assert.assertFalse(dimSelector.isNull());
int[] indices = new int[]{1, 3, 5};
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]);
// after unnest first entry in get row should equal the object
// and the row size will always be 1
Assert.assertEquals(dimSelector.getRow().get(0), indices[count]);
Assert.assertEquals(dimSelector.getRow().size(), 1);
Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME));
cursor.advance();
count++;
}
Assert.assertEquals(dimSelector.getValueCardinality(), 3);
Assert.assertEquals(count, 3);
return null;
});
}
private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier)
{
MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class));

View File

@ -172,8 +172,7 @@ public class DruidCorrelateUnnestRel extends DruidRel<DruidCorrelateUnnestRel>
correlateRowSignature.getColumnName(correlateRowSignature.size() - 1),
Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()),
getPlannerContext().getExprMacroTable()
),
null
)
),
correlateRowSignature,
getPlannerContext(),

View File

@ -2697,6 +2697,37 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testUnnestInlineWithCount()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM (select c from UNNEST(ARRAY[1,2,3]) as unnested(c))",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(
InlineDataSource.fromIterable(
ImmutableList.of(
new Object[]{1L},
new Object[]{2L},
new Object[]{3L}
),
RowSignature.builder().add("EXPR$0", ColumnType.LONG).build()
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.context(QUERY_CONTEXT_UNNEST)
.aggregators(aggregators(new CountAggregatorFactory("a0")))
.build()
),
ImmutableList.of(
new Object[]{3L}
)
);
}
@Test
public void testUnnest()
{
@ -2713,8 +2744,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -2768,15 +2798,13 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
"j0.unnest",
"string_to_array(\"dim1\",'\\u005C.')",
ColumnType.STRING_ARRAY
),
null
)
),
expressionVirtualColumn(
"_j0.unnest",
"\"dim3\"",
ColumnType.STRING
),
null
)
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
@ -2835,15 +2863,13 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
"j0.unnest",
"string_to_array(\"dim1\",'\\u005C.')",
ColumnType.STRING_ARRAY
),
null
)
),
expressionVirtualColumn(
"_j0.unnest",
"\"dim3\"",
ColumnType.STRING
),
null
)
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
@ -2898,8 +2924,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
GroupByQuery.builder()
.setDataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.setInterval(querySegmentSpec(Filtration.eternity()))
.setContext(QUERY_CONTEXT_UNNEST)
@ -2943,8 +2968,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
GroupByQuery.builder()
.setDataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.setInterval(querySegmentSpec(Filtration.eternity()))
.setContext(QUERY_CONTEXT_UNNEST)
@ -2999,8 +3023,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
new TopNQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING))
@ -3038,8 +3061,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
GroupByQuery.builder()
.setDataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.setInterval(querySegmentSpec(Filtration.eternity()))
.setContext(QUERY_CONTEXT_UNNEST)
@ -3081,8 +3103,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3116,8 +3137,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3178,8 +3198,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
.context(QUERY_CONTEXT_UNNEST)
.build()
),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3228,8 +3247,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
.context(QUERY_CONTEXT_UNNEST)
.build()
),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3262,8 +3280,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3305,8 +3322,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null))
@ -3342,8 +3358,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY),
null
expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3386,8 +3401,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
"j0.unnest",
"array(\"dim2\",\"dim4\")",
ColumnType.STRING_ARRAY
),
null
)
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
@ -3455,8 +3469,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
"(\"dim2\" == \"j0.dim2\")",
JoinType.INNER
),
expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING),
null
expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
@ -3545,4 +3558,183 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
)
);
}
@Test
public void testUnnestWithSQLFunctionOnUnnestedColumn()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT strlen(d3) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.virtualColumns(expressionVirtualColumn("v0", "strlen(\"j0.unnest\")", ColumnType.LONG))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("v0"))
.build()
),
useDefault ?
ImmutableList.of(
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{0},
new Object[]{0},
new Object[]{0}
) :
ImmutableList.of(
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{1},
new Object[]{0},
new Object[]{null},
new Object[]{null}
)
);
}
@Test
public void testUnnestWithINFiltersWithLeftRewrite()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest"))
.build()
),
ImmutableList.of(
new Object[]{"a"},
new Object[]{"b"},
new Object[]{"b"}
)
);
}
@Test
public void testUnnestWithINFiltersWithNoLeftRewrite()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','b')",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest"))
.build()
),
ImmutableList.of(
new Object[]{"a"},
new Object[]{"a"},
new Object[]{"a"},
new Object[]{"b"},
new Object[]{"b"},
new Object[]{"b"}
)
);
}
@Test
public void testUnnestWithInvalidINFiltersOnUnnestedColumn()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar')",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.filters(new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest"))
.build()
),
ImmutableList.of()
);
}
@Test
public void testUnnestWithNotFiltersOnUnnestedColumn()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='d' ",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING)
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.filters(not(selector("j0.unnest", "d", null)))
.legacy(false)
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("j0.unnest"))
.build()
),
useDefault ?
ImmutableList.of(
new Object[]{"a"},
new Object[]{"b"},
new Object[]{"b"},
new Object[]{"c"},
new Object[]{""},
new Object[]{""},
new Object[]{""}
) :
ImmutableList.of(
new Object[]{"a"},
new Object[]{"b"},
new Object[]{"b"},
new Object[]{"c"},
new Object[]{""},
new Object[]{null},
new Object[]{null}
)
);
}
}