From 185c99deec91dba3687128121d953d06abe0ffd9 Mon Sep 17 00:00:00 2001 From: fjy Date: Thu, 28 May 2015 14:24:46 -0700 Subject: [PATCH] Add schema check for overlap between dimension and metric names --- docs/content/Ingestion.md | 9 +++++++++ .../io/druid/segment/indexing/DataSchema.java | 20 +++++++++++++++---- .../segment/indexing/DataSchemaTest.java | 20 +++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/docs/content/Ingestion.md b/docs/content/Ingestion.md index d8869a0cf55..2d2d466ffe2 100644 --- a/docs/content/Ingestion.md +++ b/docs/content/Ingestion.md @@ -184,3 +184,12 @@ Batch Ingestion: See [Batch ingestion](Batch-ingestion.html) Real-time Ingestion: See [Real-time ingestion](Realtime-ingestion.html). Batch Ingestion: See [Batch ingestion](Batch-ingestion.html) + +# Evaluating Timestamp, Dimensions and Metrics + +Druid will interpret dimensions, dimension exclusions, and metrics in the following order: + +* Any column listed in the list of dimensions is treated as a dimension. +* Any column listed in the list of dimension exclusions is excluded as a dimension. +* The timestamp column and columns/fieldNames required by metrics are excluded by default. +* If a metric is also listed as a dimension, the metric must have a different name than the dimension name. diff --git a/server/src/main/java/io/druid/segment/indexing/DataSchema.java b/server/src/main/java/io/druid/segment/indexing/DataSchema.java index 26d105d604b..a739c210626 100644 --- a/server/src/main/java/io/druid/segment/indexing/DataSchema.java +++ b/server/src/main/java/io/druid/segment/indexing/DataSchema.java @@ -20,7 +20,9 @@ package io.druid.segment.indexing; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.api.client.repackaged.com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; +import com.metamx.common.IAE; import com.metamx.common.logger.Logger; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.InputRowParser; @@ -70,15 +72,25 @@ public class DataSchema } } if (dimensionsSpec != null) { + final Set metSet = Sets.newHashSet(); + for (AggregatorFactory aggregator : aggregators) { + metSet.add(aggregator.getName()); + } + final Set dimSet = Sets.newHashSet(dimensionsSpec.getDimensions()); + final Set overlap = Sets.intersection(metSet, dimSet); + if (!overlap.isEmpty()) { + throw new IAE( + "Cannot have overlapping dimensions and metrics of the same name. Please change the name of the metric. Overlap: %s", + overlap + ); + } + this.parser = parser.withParseSpec( parser.getParseSpec() .withDimensionsSpec( dimensionsSpec .withDimensionExclusions( - Sets.difference( - dimensionExclusions, - Sets.newHashSet(dimensionsSpec.getDimensions()) - ) + Sets.difference(dimensionExclusions, dimSet) ) ) ); diff --git a/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java b/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java index 99e24b40bb7..ae43d852df9 100644 --- a/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java +++ b/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java @@ -19,6 +19,7 @@ package io.druid.segment.indexing; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.metamx.common.IAE; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.JSONParseSpec; import io.druid.data.input.impl.StringInputRowParser; @@ -80,4 +81,23 @@ public class DataSchemaTest schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions() ); } + + @Test(expected = IAE.class) + public void testOverlapMetricNameAndDim() throws Exception + { + DataSchema schema = new DataSchema( + "test", + new StringInputRowParser( + new JSONParseSpec( + new TimestampSpec("time", "auto", null), + new DimensionsSpec(ImmutableList.of("time", "dimA", "dimB", "metric1"), ImmutableList.of("dimC"), null) + ) + ), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("metric1", "col1"), + new DoubleSumAggregatorFactory("metric2", "col2"), + }, + new ArbitraryGranularitySpec(QueryGranularity.DAY, ImmutableList.of(Interval.parse("2014/2015"))) + ); + } }