mirror of https://github.com/apache/druid.git
Fix capabilities reported by UnnestStorageAdapter. (#16551)
UnnestStorageAdapter and its cursors did not return capabilities correctly for the output column. This patch fixes two problems: 1) UnnestStorageAdapter returned the capabilities of the unnest virtual column prior to unnesting. It should return the post-unnest capabilities. 2) UnnestColumnValueSelectorCursor passed through isDictionaryEncoded from the unnest virtual column. This is incorrect, because the dimension selector created by this class never has a dictionary. This is the cause of #16543.
This commit is contained in:
parent
6d7d2ffa63
commit
1040a29bc5
|
@ -24,7 +24,6 @@ import org.apache.druid.query.BaseQuery;
|
|||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -202,24 +201,11 @@ public class UnnestColumnValueSelectorCursor implements Cursor
|
|||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities(String column)
|
||||
{
|
||||
if (!outputName.equals(column)) {
|
||||
return baseColumnSelectorFactory.getColumnCapabilities(column);
|
||||
if (outputName.equals(column)) {
|
||||
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
|
||||
}
|
||||
|
||||
final ColumnCapabilities capabilities = unnestColumn.capabilities(
|
||||
baseColumnSelectorFactory,
|
||||
unnestColumn.getOutputName()
|
||||
);
|
||||
|
||||
if (capabilities == null) {
|
||||
return null;
|
||||
} else if (capabilities.isArray()) {
|
||||
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
|
||||
} else if (capabilities.hasMultipleValues().isTrue()) {
|
||||
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
|
||||
} else {
|
||||
return capabilities;
|
||||
}
|
||||
return baseColumnSelectorFactory.getColumnCapabilities(column);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.druid.query.filter.DruidPredicateFactory;
|
|||
import org.apache.druid.query.filter.ValueMatcher;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
|
@ -240,25 +239,11 @@ public class UnnestDimensionCursor implements Cursor
|
|||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities(String column)
|
||||
{
|
||||
if (!outputName.equals(column)) {
|
||||
return baseColumnSelectorFactory.getColumnCapabilities(column);
|
||||
if (outputName.equals(column)) {
|
||||
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
|
||||
}
|
||||
// This currently returns the same type as of the column to be unnested
|
||||
// This is fine for STRING types
|
||||
// But going forward if the dimension to be unnested is of type ARRAY,
|
||||
// this should strip down to the base type of the array
|
||||
final ColumnCapabilities capabilities = unnestColumn.capabilities(
|
||||
baseColumnSelectorFactory,
|
||||
unnestColumn.getOutputName()
|
||||
);
|
||||
|
||||
if (capabilities.isArray()) {
|
||||
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
|
||||
}
|
||||
if (capabilities.hasMultipleValues().isTrue()) {
|
||||
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
|
||||
}
|
||||
return capabilities;
|
||||
return baseColumnSelectorFactory.getColumnCapabilities(column);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.druid.query.filter.InDimFilter;
|
|||
import org.apache.druid.query.filter.NullFilter;
|
||||
import org.apache.druid.query.filter.RangeFilter;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.TypeSignature;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.Indexed;
|
||||
import org.apache.druid.segment.data.ListIndexed;
|
||||
|
@ -229,7 +231,7 @@ public class UnnestStorageAdapter implements StorageAdapter
|
|||
public ColumnCapabilities getColumnCapabilities(String column)
|
||||
{
|
||||
if (outputColumnName.equals(column)) {
|
||||
return unnestColumn.capabilities(baseAdapter, column);
|
||||
return computeOutputColumnCapabilities(baseAdapter, unnestColumn);
|
||||
}
|
||||
|
||||
return baseAdapter.getColumnCapabilities(column);
|
||||
|
@ -559,6 +561,34 @@ public class UnnestStorageAdapter implements StorageAdapter
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the capabilities of {@link #outputColumnName}, after unnesting.
|
||||
*/
|
||||
@Nullable
|
||||
public static ColumnCapabilities computeOutputColumnCapabilities(
|
||||
final ColumnInspector baseColumnInspector,
|
||||
final VirtualColumn unnestColumn
|
||||
)
|
||||
{
|
||||
final ColumnCapabilities capabilities = unnestColumn.capabilities(
|
||||
baseColumnInspector,
|
||||
unnestColumn.getOutputName()
|
||||
);
|
||||
|
||||
if (capabilities == null) {
|
||||
return null;
|
||||
} else {
|
||||
// Arrays are unnested as their element type. Anything else is unnested as the same type.
|
||||
final TypeSignature<ValueType> outputType =
|
||||
capabilities.isArray() ? capabilities.getElementType() : capabilities.toColumnType();
|
||||
|
||||
return ColumnCapabilitiesImpl.createDefault()
|
||||
.setType(outputType)
|
||||
.setHasMultipleValues(false)
|
||||
.setDictionaryEncoded(useDimensionCursor(capabilities));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Requirement for {@link #rewriteFilterOnUnnestColumnIfPossible}: filter must support rewrites and also must map
|
||||
* over multi-value strings. (Rather than treat them as arrays.) There isn't a method on the Filter interface that
|
||||
|
|
|
@ -194,7 +194,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
@Test
|
||||
public void test_group_of_unnest_adapters_column_capabilities()
|
||||
public void test_unnest_adapter_column_capabilities()
|
||||
{
|
||||
String colName = "multi-string1";
|
||||
List<String> columnsInTable = Arrays.asList(
|
||||
|
@ -220,7 +220,14 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(capabilities.getType(), valueTypes.get(i));
|
||||
}
|
||||
assertColumnReadsIdentifier(adapter.getUnnestColumn(), colName);
|
||||
|
||||
Assert.assertEquals(
|
||||
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
|
||||
ColumnCapabilities.Capable.TRUE // passed through from dict-encoded input
|
||||
);
|
||||
Assert.assertEquals(
|
||||
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).hasMultipleValues(),
|
||||
ColumnCapabilities.Capable.FALSE
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -4092,6 +4092,54 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnnestArrayColumnsStringThenFunction()
|
||||
{
|
||||
// Regresson test for https://github.com/apache/druid/issues/16543.
|
||||
cannotVectorize();
|
||||
testQuery(
|
||||
"SELECT a || '.txt' FROM druid.arrays, UNNEST(arrayString) as unnested (a)",
|
||||
QUERY_CONTEXT_UNNEST,
|
||||
ImmutableList.of(
|
||||
newScanQueryBuilder()
|
||||
.dataSource(UnnestDataSource.create(
|
||||
new TableDataSource(CalciteTests.ARRAYS_DATASOURCE),
|
||||
expressionVirtualColumn("j0.unnest", "\"arrayString\"", ColumnType.STRING_ARRAY),
|
||||
null
|
||||
))
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.virtualColumns(expressionVirtualColumn("v0", "concat(\"j0.unnest\",'.txt')", ColumnType.STRING))
|
||||
.context(QUERY_CONTEXT_UNNEST)
|
||||
.columns(ImmutableList.of("v0"))
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{"d.txt"},
|
||||
new Object[]{"e.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"c.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"c.txt"},
|
||||
new Object[]{"d.txt"},
|
||||
new Object[]{"e.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"c.txt"},
|
||||
new Object[]{"a.txt"},
|
||||
new Object[]{"b.txt"},
|
||||
new Object[]{"c.txt"}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnnestArrayColumnsStringNulls()
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue