mirror of https://github.com/apache/druid.git
SQL: Fix too-strict check in SortProject. (#6403)
The "Duplicate field name" check on inputRowSignature is too strict: it is actually fine for a row signature to have the same field name twice. It happens when the same expression is selected twice, and both selections map to the same Druid object (dimension, aggregator, etc). I did not succeed in writing a test that triggers this, but I did see it occur in production for a complex query with hundreds of aggregators.
This commit is contained in:
parent
63ba7f7bec
commit
3922582d8c
|
@ -154,7 +154,7 @@ public class DruidQuery
|
|||
sortingInputRowSignature = sourceRowSignature;
|
||||
}
|
||||
|
||||
this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature, grouping);
|
||||
this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature);
|
||||
|
||||
// outputRowSignature is used only for scan and select query, and thus sort and grouping must be null
|
||||
this.outputRowSignature = sortProject == null ? sortingInputRowSignature : sortProject.getOutputRowSignature();
|
||||
|
@ -328,8 +328,7 @@ public class DruidQuery
|
|||
private SortProject computeSortProject(
|
||||
PartialDruidQuery partialQuery,
|
||||
PlannerContext plannerContext,
|
||||
RowSignature sortingInputRowSignature,
|
||||
Grouping grouping
|
||||
RowSignature sortingInputRowSignature
|
||||
)
|
||||
{
|
||||
final Project sortProject = partialQuery.getSortProject();
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SortProject
|
||||
{
|
||||
|
@ -44,26 +45,21 @@ public class SortProject
|
|||
this.postAggregators = Preconditions.checkNotNull(postAggregators, "postAggregators");
|
||||
this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature");
|
||||
|
||||
// Verify no collisions.
|
||||
final Set<String> seen = new HashSet<>();
|
||||
inputRowSignature.getRowOrder().forEach(field -> {
|
||||
if (!seen.add(field)) {
|
||||
throw new ISE("Duplicate field name: %s", field);
|
||||
}
|
||||
});
|
||||
final Set<String> inputColumnNames = new HashSet<>(inputRowSignature.getRowOrder());
|
||||
final Set<String> postAggregatorNames = postAggregators.stream()
|
||||
.map(PostAggregator::getName)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
for (PostAggregator postAggregator : postAggregators) {
|
||||
if (postAggregator == null) {
|
||||
throw new ISE("aggregation[%s] is not a postAggregator", postAggregator);
|
||||
}
|
||||
if (!seen.add(postAggregator.getName())) {
|
||||
throw new ISE("Duplicate field name: %s", postAggregator.getName());
|
||||
// Verify no collisions between inputs and outputs.
|
||||
for (String postAggregatorName : postAggregatorNames) {
|
||||
if (inputColumnNames.contains(postAggregatorName)) {
|
||||
throw new ISE("Duplicate field name: %s", postAggregatorName);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that items in the output signature exist.
|
||||
outputRowSignature.getRowOrder().forEach(field -> {
|
||||
if (!seen.contains(field)) {
|
||||
if (!inputColumnNames.contains(field) && !postAggregatorNames.contains(field)) {
|
||||
throw new ISE("Missing field in rowOrder: %s", field);
|
||||
}
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue