Fix grouping aggregator when one of the dimension is a simple extraction (#15421)

This PR fixes an issue where the grouping aggregator wrongly assumes that a key dimension is a virtual column and assigns a wrong name to it. This results in a mismatch between the dimensions that grouping aggregator sees and the dimension names that rows are aggregated on. And finally, grouping aggregator generates wrong result.
This commit is contained in:
Abhishek Agarwal 2023-11-24 13:15:07 +05:30 committed by GitHub
parent 4ab0b71513
commit 3113e7b350
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 96 additions and 2 deletions

View File

@ -116,10 +116,13 @@ public class GroupingSqlAggregator implements SqlAggregator
return expression.getDirectColumn();
}
String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
if (expression.isSimpleExtraction()) {
return expression.getSimpleExtraction().getColumn();
}
return virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
expression,
node.getType()
);
return virtualColumn;
}
}

View File

@ -2580,6 +2580,97 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testExactCountDistinctLookup()
{
msqIncompatible();
final String sqlQuery = "SELECT CAST(LOOKUP(dim1, 'lookyloo') AS VARCHAR), "
+ "COUNT(DISTINCT foo.dim2), "
+ "SUM(foo.cnt) FROM druid.foo "
+ "GROUP BY 1";
// ExtractionDimensionSpec cannot be vectorized
cannotVectorize();
requireMergeBuffers(3);
testQuery(
PLANNER_CONFIG_NO_HLL.withOverrides(
ImmutableMap.of(
PlannerConfig.CTX_KEY_USE_GROUPING_SET_FOR_EXACT_DISTINCT,
"true"
)
),
sqlQuery,
CalciteTests.REGULAR_USER_AUTH_RESULT,
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(
new QueryDataSource(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(dimensions(
new ExtractionDimensionSpec(
"dim1",
"d0",
ColumnType.STRING,
new RegisteredLookupExtractionFn(
null,
"lookyloo",
false,
null,
null,
true
)
),
new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING)
))
.setAggregatorSpecs(
aggregators(
new LongSumAggregatorFactory("a0", "cnt"),
new GroupingAggregatorFactory(
"a1",
Arrays.asList("dim1", "dim2")
)
)
)
.setSubtotalsSpec(
ImmutableList.of(
ImmutableList.of("d0", "d1"),
ImmutableList.of("d0")
)
)
.build()
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(new DefaultDimensionSpec("d0", "_d0", ColumnType.STRING))
.setAggregatorSpecs(aggregators(
new FilteredAggregatorFactory(
new CountAggregatorFactory("_a0"),
and(
notNull("d1"),
equality("a1", 0L, ColumnType.LONG)
)
),
new FilteredAggregatorFactory(
new LongMinAggregatorFactory("_a1", "a0"),
equality("a1", 1L, ColumnType.LONG)
)
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), NullHandling.replaceWithDefault() ? 2L : 3L, 5L},
new Object[]{"xabc", 0L, 1L}
)
);
}
@Test
public void testHavingOnFloatSum()
{