fix ColumnType to RelDataType conversion for nested arrays (#16138)

* fix ColumnType to RelDataType conversion for nested arrays

* fix test
This commit is contained in:
Clint Wylie 2024-03-18 23:34:08 -07:00 committed by GitHub
parent c7823bca98
commit 5afd5c41a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 208 additions and 42 deletions

View File

@ -96,8 +96,8 @@ public class RowSignatures
}
/**
* Returns a Calcite RelDataType corresponding to a row signature. It will typecast __time column to TIMESTAMP
* irrespective of the type present in the row signature
* Returns a Calcite {@link RelDataType} corresponding to a {@link RowSignature}. It will typecast __time column to
* TIMESTAMP irrespective of the type present in the row signature
*/
public static RelDataType toRelDataType(final RowSignature rowSignature, final RelDataTypeFactory typeFactory)
{
@ -105,8 +105,8 @@ public class RowSignatures
}
/**
* Returns a Calcite RelDataType corresponding to a row signature.
* For columns that are named "__time", it automatically casts it to TIMESTAMP if typecastTimeColumn is set to true
* Returns a Calcite {@link RelDataType} corresponding to a {@link RowSignature}. For columns that are named
* "__time", it automatically casts it to TIMESTAMP if typecastTimeColumn is set to true
*/
public static RelDataType toRelDataType(
final RowSignature rowSignature,
@ -126,6 +126,25 @@ public class RowSignatures
rowSignature.getColumnType(columnName)
.orElseThrow(() -> new ISE("Encountered null type for column[%s]", columnName));
type = columnTypeToRelDataType(typeFactory, columnType, nullNumeric);
}
builder.add(columnName, type);
}
return builder.build();
}
/**
* Returns a Calcite {@link RelDataType} corresponding to a {@link ColumnType}
*/
public static RelDataType columnTypeToRelDataType(
RelDataTypeFactory typeFactory,
ColumnType columnType,
boolean nullNumeric
)
{
final RelDataType type;
switch (columnType.getType()) {
case STRING:
// Note that there is no attempt here to handle multi-value in any special way. Maybe one day...
@ -141,35 +160,22 @@ public class RowSignatures
type = Calcites.createSqlTypeWithNullability(typeFactory, SqlTypeName.DOUBLE, nullNumeric);
break;
case ARRAY:
switch (columnType.getElementType().getType()) {
case STRING:
type = Calcites.createSqlArrayTypeWithNullability(typeFactory, SqlTypeName.VARCHAR, true);
break;
case LONG:
type = Calcites.createSqlArrayTypeWithNullability(typeFactory, SqlTypeName.BIGINT, nullNumeric);
break;
case DOUBLE:
type = Calcites.createSqlArrayTypeWithNullability(typeFactory, SqlTypeName.DOUBLE, nullNumeric);
break;
case FLOAT:
type = Calcites.createSqlArrayTypeWithNullability(typeFactory, SqlTypeName.FLOAT, nullNumeric);
break;
default:
throw new ISE("valueType[%s] not translatable", columnType);
}
final RelDataType elementType = columnTypeToRelDataType(
typeFactory,
(ColumnType) columnType.getElementType(),
nullNumeric
);
type = typeFactory.createTypeWithNullability(
typeFactory.createArrayType(elementType, -1),
true
);
break;
case COMPLEX:
type = makeComplexType(typeFactory, columnType, true);
break;
default:
throw new ISE("valueType[%s] not translatable", columnType);
}
}
builder.add(columnName, type);
}
return builder.build();
} return type;
}
/**

View File

@ -7327,4 +7327,164 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
)
);
}
@Test
public void testGroupByNestedArrayInline()
{
cannotVectorize();
// msq does not support nested arrays currently
msqIncompatible();
testQuery(
"SELECT c1, ARRAY_PREPEND('1', ARRAY_AGG(ARRAY[1,c2], 100000)) c5 \n"
+ "FROM (VALUES (1,1),(2,2),(3,3)) t(c1,c2)\n"
+ "GROUP BY 1 \n"
+ "HAVING ARRAY_PREPEND('1', ARRAY_AGG(ARRAY[1,c2], 100000)) <> ARRAY_PREPEND('0', ARRAY_AGG(ARRAY[1,c2], 100000))",
QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(
InlineDataSource.fromIterable(
ImmutableList.of(
new Object[]{1L, 1L},
new Object[]{2L, 2L},
new Object[]{3L, 3L}
),
RowSignature.builder()
.add("c1", ColumnType.LONG)
.add("c2", ColumnType.LONG)
.build()
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
expressionVirtualColumn(
"v0",
"array(1,\"c2\")",
ColumnType.LONG_ARRAY
)
)
.setDimensions(new DefaultDimensionSpec("c1", "d0", ColumnType.LONG))
.setAggregatorSpecs(
new ExpressionLambdaAggregatorFactory(
"a0",
ImmutableSet.of("v0"),
"__acc",
"ARRAY<ARRAY<LONG>>[]",
"ARRAY<ARRAY<LONG>>[]",
true,
true,
false,
"array_append(\"__acc\", \"v0\")",
"array_concat(\"__acc\", \"a0\")",
null,
null,
HumanReadableBytes.valueOf(100000),
TestExprMacroTable.INSTANCE
)
)
.setPostAggregatorSpecs(
expressionPostAgg(
"p0",
"array_prepend('1',\"a0\")",
ColumnType.ofArray(ColumnType.LONG_ARRAY)
)
)
.setHavingSpec(
new DimFilterHavingSpec(
expressionFilter("(array_prepend('1',\"a0\") != array_prepend('0',\"a0\"))"),
true
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{1, ImmutableList.of(ImmutableList.of(1L), ImmutableList.of(1L, 1L))},
new Object[]{2, ImmutableList.of(ImmutableList.of(1L), ImmutableList.of(1L, 2L))},
new Object[]{3, ImmutableList.of(ImmutableList.of(1L), ImmutableList.of(1L, 3L))}
)
);
}
@Test
public void testGroupByNestedArrayInlineCount()
{
cannotVectorize();
// msq does not support nested arrays currently
msqIncompatible();
testQuery(
"SELECT COUNT(*) c FROM (\n"
+ "SELECT c1, ARRAY_PREPEND('1', ARRAY_AGG(ARRAY[1,c2], 100000)) c5 \n"
+ "FROM (VALUES (1,1),(2,2),(3,3)) t(c1,c2)\n"
+ "GROUP BY 1 \n"
+ "HAVING ARRAY_PREPEND('1', ARRAY_AGG(ARRAY[1,c2], 100000)) <> ARRAY_PREPEND('0', ARRAY_AGG(ARRAY[1,c2], 100000))\n"
+ ")",
QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(
GroupByQuery.builder()
.setDataSource(
InlineDataSource.fromIterable(
ImmutableList.of(
new Object[]{1L, 1L},
new Object[]{2L, 2L},
new Object[]{3L, 3L}
),
RowSignature.builder()
.add("c1", ColumnType.LONG)
.add("c2", ColumnType.LONG)
.build()
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
expressionVirtualColumn(
"v0",
"array(1,\"c2\")",
ColumnType.LONG_ARRAY
)
)
.setDimensions(new DefaultDimensionSpec("c1", "d0", ColumnType.LONG))
.setAggregatorSpecs(
new ExpressionLambdaAggregatorFactory(
"a0",
ImmutableSet.of("v0"),
"__acc",
"ARRAY<ARRAY<LONG>>[]",
"ARRAY<ARRAY<LONG>>[]",
true,
true,
false,
"array_append(\"__acc\", \"v0\")",
"array_concat(\"__acc\", \"a0\")",
null,
null,
HumanReadableBytes.valueOf(100000),
TestExprMacroTable.INSTANCE
)
)
.setHavingSpec(
new DimFilterHavingSpec(
expressionFilter(
"(array_prepend('1',\"a0\") != array_prepend('0',\"a0\"))"),
true
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setAggregatorSpecs(new CountAggregatorFactory("_a0"))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{3L}
)
);
}
}