Merge pull request #1399 from druid-io/overlap-check

Add schema check for overlap between dimension and metric names
This commit is contained in:
Xavier Léauté 2015-05-29 09:39:22 -07:00
commit 20fdb627d9
3 changed files with 45 additions and 4 deletions

View File

@ -184,3 +184,12 @@ Batch Ingestion: See [Batch ingestion](Batch-ingestion.html)
Real-time Ingestion: See [Real-time ingestion](Realtime-ingestion.html).
Batch Ingestion: See [Batch ingestion](Batch-ingestion.html)
# Evaluating Timestamp, Dimensions and Metrics
Druid will interpret dimensions, dimension exclusions, and metrics in the following order:
* Any column listed in the list of dimensions is treated as a dimension.
* Any column listed in the list of dimension exclusions is excluded as a dimension.
* The timestamp column and columns/fieldNames required by metrics are excluded by default.
* If a metric is also listed as a dimension, the metric must have a different name than the dimension name.

View File

@ -20,7 +20,9 @@ package io.druid.segment.indexing;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.api.client.repackaged.com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.metamx.common.IAE;
import com.metamx.common.logger.Logger;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.InputRowParser;
@ -70,15 +72,25 @@ public class DataSchema
}
}
if (dimensionsSpec != null) {
final Set<String> metSet = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) {
metSet.add(aggregator.getName());
}
final Set<String> dimSet = Sets.newHashSet(dimensionsSpec.getDimensions());
final Set<String> overlap = Sets.intersection(metSet, dimSet);
if (!overlap.isEmpty()) {
throw new IAE(
"Cannot have overlapping dimensions and metrics of the same name. Please change the name of the metric. Overlap: %s",
overlap
);
}
this.parser = parser.withParseSpec(
parser.getParseSpec()
.withDimensionsSpec(
dimensionsSpec
.withDimensionExclusions(
Sets.difference(
dimensionExclusions,
Sets.newHashSet(dimensionsSpec.getDimensions())
)
Sets.difference(dimensionExclusions, dimSet)
)
)
);

View File

@ -19,6 +19,7 @@ package io.druid.segment.indexing;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.metamx.common.IAE;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.JSONParseSpec;
import io.druid.data.input.impl.StringInputRowParser;
@ -80,4 +81,23 @@ public class DataSchemaTest
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);
}
@Test(expected = IAE.class)
public void testOverlapMetricNameAndDim() throws Exception
{
DataSchema schema = new DataSchema(
"test",
new StringInputRowParser(
new JSONParseSpec(
new TimestampSpec("time", "auto", null),
new DimensionsSpec(ImmutableList.of("time", "dimA", "dimB", "metric1"), ImmutableList.of("dimC"), null)
)
),
new AggregatorFactory[]{
new DoubleSumAggregatorFactory("metric1", "col1"),
new DoubleSumAggregatorFactory("metric2", "col2"),
},
new ArbitraryGranularitySpec(QueryGranularity.DAY, ImmutableList.of(Interval.parse("2014/2015")))
);
}
}