Fix capabilities reported by UnnestStorageAdapter. (#16551)

UnnestStorageAdapter and its cursors did not return capabilities correctly
for the output column. This patch fixes two problems:

1) UnnestStorageAdapter returned the capabilities of the unnest virtual
   column prior to unnesting. It should return the post-unnest capabilities.

2) UnnestColumnValueSelectorCursor passed through isDictionaryEncoded from
   the unnest virtual column. This is incorrect, because the dimension selector
   created by this class never has a dictionary. This is the cause of #16543.
This commit is contained in:
Gian Merlino 2024-06-05 15:19:42 -07:00 committed by GitHub
parent 6d7d2ffa63
commit 1040a29bc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 94 additions and 38 deletions

View File

@ -24,7 +24,6 @@ import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
@ -202,24 +201,11 @@ public class UnnestColumnValueSelectorCursor implements Cursor
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);
if (capabilities == null) {
return null;
} else if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
} else if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
} else {
return capabilities;
}
return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}

View File

@ -26,7 +26,6 @@ import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.data.IndexedInts;
import org.joda.time.DateTime;
@ -240,25 +239,11 @@ public class UnnestDimensionCursor implements Cursor
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}
// This currently returns the same type as of the column to be unnested
// This is fine for STRING types
// But going forward if the dimension to be unnested is of type ARRAY,
// this should strip down to the base type of the array
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);
if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
}
if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
}
return capabilities;
return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}

View File

@ -35,6 +35,8 @@ import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.TypeSignature;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
@ -229,7 +231,7 @@ public class UnnestStorageAdapter implements StorageAdapter
public ColumnCapabilities getColumnCapabilities(String column)
{
if (outputColumnName.equals(column)) {
return unnestColumn.capabilities(baseAdapter, column);
return computeOutputColumnCapabilities(baseAdapter, unnestColumn);
}
return baseAdapter.getColumnCapabilities(column);
@ -559,6 +561,34 @@ public class UnnestStorageAdapter implements StorageAdapter
}
}
/**
* Computes the capabilities of {@link #outputColumnName}, after unnesting.
*/
@Nullable
public static ColumnCapabilities computeOutputColumnCapabilities(
final ColumnInspector baseColumnInspector,
final VirtualColumn unnestColumn
)
{
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnInspector,
unnestColumn.getOutputName()
);
if (capabilities == null) {
return null;
} else {
// Arrays are unnested as their element type. Anything else is unnested as the same type.
final TypeSignature<ValueType> outputType =
capabilities.isArray() ? capabilities.getElementType() : capabilities.toColumnType();
return ColumnCapabilitiesImpl.createDefault()
.setType(outputType)
.setHasMultipleValues(false)
.setDictionaryEncoded(useDimensionCursor(capabilities));
}
}
/**
* Requirement for {@link #rewriteFilterOnUnnestColumnIfPossible}: filter must support rewrites and also must map
* over multi-value strings. (Rather than treat them as arrays.) There isn't a method on the Filter interface that

View File

@ -194,7 +194,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
}
@Test
public void test_group_of_unnest_adapters_column_capabilities()
public void test_unnest_adapter_column_capabilities()
{
String colName = "multi-string1";
List<String> columnsInTable = Arrays.asList(
@ -220,7 +220,14 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
Assert.assertEquals(capabilities.getType(), valueTypes.get(i));
}
assertColumnReadsIdentifier(adapter.getUnnestColumn(), colName);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
ColumnCapabilities.Capable.TRUE // passed through from dict-encoded input
);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).hasMultipleValues(),
ColumnCapabilities.Capable.FALSE
);
}
@Test

View File

@ -4092,6 +4092,54 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testUnnestArrayColumnsStringThenFunction()
{
// Regresson test for https://github.com/apache/druid/issues/16543.
cannotVectorize();
testQuery(
"SELECT a || '.txt' FROM druid.arrays, UNNEST(arrayString) as unnested (a)",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.ARRAYS_DATASOURCE),
expressionVirtualColumn("j0.unnest", "\"arrayString\"", ColumnType.STRING_ARRAY),
null
))
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(expressionVirtualColumn("v0", "concat(\"j0.unnest\",'.txt')", ColumnType.STRING))
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("v0"))
.build()
),
ImmutableList.of(
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"}
)
);
}
@Test
public void testUnnestArrayColumnsStringNulls()
{