SQL: Fix too-strict check in SortProject. (#6403)

The "Duplicate field name" check on inputRowSignature is too strict:
it is actually fine for a row signature to have the same field name
twice. It happens when the same expression is selected twice, and
both selections map to the same Druid object (dimension, aggregator,
etc).

I did not succeed in writing a test that triggers this, but I did see
it occur in production for a complex query with hundreds of aggregators.
This commit is contained in:
Gian Merlino 2018-09-29 13:54:34 -07:00 committed by GitHub
parent 63ba7f7bec
commit 3922582d8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 17 deletions

View File

@ -154,7 +154,7 @@ public class DruidQuery
sortingInputRowSignature = sourceRowSignature;
}
this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature, grouping);
this.sortProject = computeSortProject(partialQuery, plannerContext, sortingInputRowSignature);
// outputRowSignature is used only for scan and select query, and thus sort and grouping must be null
this.outputRowSignature = sortProject == null ? sortingInputRowSignature : sortProject.getOutputRowSignature();
@ -328,8 +328,7 @@ public class DruidQuery
private SortProject computeSortProject(
PartialDruidQuery partialQuery,
PlannerContext plannerContext,
RowSignature sortingInputRowSignature,
Grouping grouping
RowSignature sortingInputRowSignature
)
{
final Project sortProject = partialQuery.getSortProject();

View File

@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
public class SortProject
{
@ -44,26 +45,21 @@ public class SortProject
this.postAggregators = Preconditions.checkNotNull(postAggregators, "postAggregators");
this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature");
// Verify no collisions.
final Set<String> seen = new HashSet<>();
inputRowSignature.getRowOrder().forEach(field -> {
if (!seen.add(field)) {
throw new ISE("Duplicate field name: %s", field);
}
});
final Set<String> inputColumnNames = new HashSet<>(inputRowSignature.getRowOrder());
final Set<String> postAggregatorNames = postAggregators.stream()
.map(PostAggregator::getName)
.collect(Collectors.toSet());
for (PostAggregator postAggregator : postAggregators) {
if (postAggregator == null) {
throw new ISE("aggregation[%s] is not a postAggregator", postAggregator);
}
if (!seen.add(postAggregator.getName())) {
throw new ISE("Duplicate field name: %s", postAggregator.getName());
// Verify no collisions between inputs and outputs.
for (String postAggregatorName : postAggregatorNames) {
if (inputColumnNames.contains(postAggregatorName)) {
throw new ISE("Duplicate field name: %s", postAggregatorName);
}
}
// Verify that items in the output signature exist.
outputRowSignature.getRowOrder().forEach(field -> {
if (!seen.contains(field)) {
if (!inputColumnNames.contains(field) && !postAggregatorNames.contains(field)) {
throw new ISE("Missing field in rowOrder: %s", field);
}
});