SQL: Fix too-strict check in SortProject. (#6403)

The "Duplicate field name" check on inputRowSignature is too strict:
it is actually fine for a row signature to have the same field name
twice. It happens when the same expression is selected twice, and
both selections map to the same Druid object (dimension, aggregator,
etc).

I did not succeed in writing a test that triggers this, but I did see
it occur in production for a complex query with hundreds of aggregators.
This commit is contained in:
Gian Merlino 2018-09-29 13:54:34 -07:00 committed by GitHub
parent 63ba7f7bec
commit 3922582d8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 17 deletions

View File

@ -154,7 +154,7 @@ public class DruidQuery
sortingInputRowSignature = sourceRowSignature; sortingInputRowSignature = sourceRowSignature;
} }
this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature, grouping); this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature);
// outputRowSignature is used only for scan and select query, and thus sort and grouping must be null // outputRowSignature is used only for scan and select query, and thus sort and grouping must be null
this.outputRowSignature = sortProject == null ? sortingInputRowSignature : sortProject.getOutputRowSignature(); this.outputRowSignature = sortProject == null ? sortingInputRowSignature : sortProject.getOutputRowSignature();
@ -328,8 +328,7 @@ public class DruidQuery
private SortProject computeSortProject( private SortProject computeSortProject(
PartialDruidQuery partialQuery, PartialDruidQuery partialQuery,
PlannerContext plannerContext, PlannerContext plannerContext,
RowSignature sortingInputRowSignature, RowSignature sortingInputRowSignature
Grouping grouping
) )
{ {
final Project sortProject = partialQuery.getSortProject(); final Project sortProject = partialQuery.getSortProject();

View File

@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
public class SortProject public class SortProject
{ {
@ -44,26 +45,21 @@ public class SortProject
this.postAggregators = Preconditions.checkNotNull(postAggregators, "postAggregators"); this.postAggregators = Preconditions.checkNotNull(postAggregators, "postAggregators");
this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature"); this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature");
// Verify no collisions. final Set<String> inputColumnNames = new HashSet<>(inputRowSignature.getRowOrder());
final Set<String> seen = new HashSet<>(); final Set<String> postAggregatorNames = postAggregators.stream()
inputRowSignature.getRowOrder().forEach(field -> { .map(PostAggregator::getName)
if (!seen.add(field)) { .collect(Collectors.toSet());
throw new ISE("Duplicate field name: %s", field);
}
});
for (PostAggregator postAggregator : postAggregators) { // Verify no collisions between inputs and outputs.
if (postAggregator == null) { for (String postAggregatorName : postAggregatorNames) {
throw new ISE("aggregation[%s] is not a postAggregator", postAggregator); if (inputColumnNames.contains(postAggregatorName)) {
} throw new ISE("Duplicate field name: %s", postAggregatorName);
if (!seen.add(postAggregator.getName())) {
throw new ISE("Duplicate field name: %s", postAggregator.getName());
} }
} }
// Verify that items in the output signature exist. // Verify that items in the output signature exist.
outputRowSignature.getRowOrder().forEach(field -> { outputRowSignature.getRowOrder().forEach(field -> {
if (!seen.contains(field)) { if (!inputColumnNames.contains(field) && !postAggregatorNames.contains(field)) {
throw new ISE("Missing field in rowOrder: %s", field); throw new ISE("Missing field in rowOrder: %s", field);
} }
}); });