Merge pull request #474 from metamx/groupby-dimextractionfn

support dimension extraction functions in group by
This commit is contained in:
fjy 2014-04-09 18:23:06 -06:00
commit bfeed26765
2 changed files with 51 additions and 1 deletions

View File

@ -44,6 +44,7 @@ import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.BufferAggregator; import io.druid.query.aggregation.BufferAggregator;
import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.PostAggregator;
import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.DimensionSpec;
import io.druid.query.extraction.DimExtractionFn;
import io.druid.segment.Cursor; import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector; import io.druid.segment.DimensionSelector;
import io.druid.segment.StorageAdapter; import io.druid.segment.StorageAdapter;
@ -398,11 +399,16 @@ public class GroupByQueryEngine
ByteBuffer keyBuffer = input.getKey().duplicate(); ByteBuffer keyBuffer = input.getKey().duplicate();
for (int i = 0; i < dimensions.size(); ++i) { for (int i = 0; i < dimensions.size(); ++i) {
final DimensionSelector dimSelector = dimensions.get(i); final DimensionSelector dimSelector = dimensions.get(i);
final DimExtractionFn fn = dimensionSpecs.get(i).getDimExtractionFn();
final int dimVal = keyBuffer.getInt(); final int dimVal = keyBuffer.getInt();
if (dimSelector.getValueCardinality() != dimVal) { if (dimSelector.getValueCardinality() != dimVal) {
if(fn != null) {
theEvent.put(dimNames.get(i), fn.apply(dimSelector.lookupName(dimVal)));
} else {
theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal));
} }
} }
}
int position = input.getValue(); int position = input.getValue();
for (int i = 0; i < aggregators.length; ++i) { for (int i = 0; i < aggregators.length; ++i) {

View File

@ -45,6 +45,9 @@ import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.aggregation.MaxAggregatorFactory;
import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.DimensionSpec;
import io.druid.query.dimension.ExtractionDimensionSpec;
import io.druid.query.extraction.PartialDimExtractionFn;
import io.druid.query.extraction.RegexDimExtractionFn;
import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.JavaScriptDimFilter;
import io.druid.query.filter.RegexDimFilter; import io.druid.query.filter.RegexDimFilter;
import io.druid.query.groupby.having.EqualToHavingSpec; import io.druid.query.groupby.having.EqualToHavingSpec;
@ -178,6 +181,47 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(expectedResults, results, ""); TestHelper.assertExpectedObjects(expectedResults, results, "");
} }
@Test
public void testGroupByWithDimExtractionFn()
{
GroupByQuery query = GroupByQuery
.builder()
.setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("quality", "alias", new RegexDimExtractionFn("(\\w{1})"))))
.setAggregatorSpecs(
Arrays.<AggregatorFactory>asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.build();
List<Row> expectedResults = Arrays.asList(
createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L),
createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L),
createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L),
createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L),
createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L),
createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L),
createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L),
createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L),
createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L),
createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L),
createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L),
createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L),
createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L),
createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L),
createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L),
createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L)
);
Iterable<Row> results = runQuery(query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test @Test
public void testGroupByWithTimeZone() public void testGroupByWithTimeZone()
{ {