Merge pull request #842 from metamx/fix-timestamp-column-exclusion

fix dimension exclusions for timestamp and aggs
This commit is contained in:
Fangjin Yang 2014-11-12 09:44:50 -07:00
commit 58260165ad
2 changed files with 105 additions and 7 deletions

View File

@ -22,7 +22,9 @@ package io.druid.segment.indexing;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.InputRowParser; import io.druid.data.input.impl.InputRowParser;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.indexing.granularity.GranularitySpec; import io.druid.segment.indexing.granularity.GranularitySpec;
import io.druid.segment.indexing.granularity.UniformGranularitySpec; import io.druid.segment.indexing.granularity.UniformGranularitySpec;
@ -50,19 +52,28 @@ public class DataSchema
final Set<String> dimensionExclusions = Sets.newHashSet(); final Set<String> dimensionExclusions = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) { for (AggregatorFactory aggregator : aggregators) {
dimensionExclusions.add(aggregator.getName()); dimensionExclusions.addAll(aggregator.requiredFields());
} }
if (parser != null && parser.getParseSpec() != null) { if (parser != null && parser.getParseSpec() != null) {
if (parser.getParseSpec().getTimestampSpec() != null) { final DimensionsSpec dimensionsSpec = parser.getParseSpec().getDimensionsSpec();
dimensionExclusions.add(parser.getParseSpec().getTimestampSpec().getTimestampColumn()); final TimestampSpec timestampSpec = parser.getParseSpec().getTimestampSpec();
// exclude timestamp from dimensions by default, unless explicitly included in the list of dimensions
if (timestampSpec != null) {
final String timestampColumn = timestampSpec.getTimestampColumn();
if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensions().contains(timestampColumn))) {
dimensionExclusions.add(timestampColumn);
}
} }
if (parser.getParseSpec().getDimensionsSpec() != null) { if (dimensionsSpec != null) {
this.parser = parser.withParseSpec( this.parser = parser.withParseSpec(
parser.getParseSpec() parser.getParseSpec()
.withDimensionsSpec( .withDimensionsSpec(
parser.getParseSpec() dimensionsSpec
.getDimensionsSpec() .withDimensionExclusions(
.withDimensionExclusions(dimensionExclusions) Sets.difference(dimensionExclusions,
Sets.newHashSet(dimensionsSpec.getDimensions()))
)
) )
); );
} else { } else {

View File

@ -0,0 +1,87 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2014 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package io.druid.segment.indexing;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.JSONParseSpec;
import io.druid.data.input.impl.StringInputRowParser;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
import org.junit.Assert;
import org.joda.time.Interval;
import org.junit.Test;
public class DataSchemaTest
{
@Test
public void testDefaultExclusions() throws Exception
{
DataSchema schema = new DataSchema(
"test",
new StringInputRowParser(
new JSONParseSpec(
new TimestampSpec("time", "auto"),
new DimensionsSpec(ImmutableList.of("dimB", "dimA"), null, null)
),
null, null, null, null
),
new AggregatorFactory[]{
new DoubleSumAggregatorFactory("metric1", "col1"),
new DoubleSumAggregatorFactory("metric2", "col2"),
},
new ArbitraryGranularitySpec(QueryGranularity.DAY, ImmutableList.of(Interval.parse("2014/2015")))
);
Assert.assertEquals(
ImmutableSet.of("time", "col1", "col2"),
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);
}
@Test
public void testExplicitInclude() throws Exception
{
DataSchema schema = new DataSchema(
"test",
new StringInputRowParser(
new JSONParseSpec(
new TimestampSpec("time", "auto"),
new DimensionsSpec(ImmutableList.of("time", "dimA", "dimB", "col2"), ImmutableList.of("dimC"), null)
),
null, null, null, null
),
new AggregatorFactory[]{
new DoubleSumAggregatorFactory("metric1", "col1"),
new DoubleSumAggregatorFactory("metric2", "col2"),
},
new ArbitraryGranularitySpec(QueryGranularity.DAY, ImmutableList.of(Interval.parse("2014/2015")))
);
Assert.assertEquals(
ImmutableSet.of("dimC", "col1"),
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);
}
}