js/cardinality/hyperUnique happy groupBy family

This commit is contained in:
Xavier Léauté 2014-06-12 15:16:07 -07:00
parent fd47efd59e
commit bfa3caa589
5 changed files with 61 additions and 38 deletions

View File

@ -73,12 +73,13 @@ public class HyperUniquesAggregatorFactory implements AggregatorFactory
return Aggregators.noopAggregator(); return Aggregators.noopAggregator();
} }
if (HyperLogLogCollector.class.isAssignableFrom(selector.classOfObject())) { final Class classOfObject = selector.classOfObject();
if (classOfObject.equals(Object.class) || HyperLogLogCollector.class.isAssignableFrom(classOfObject)) {
return new HyperUniquesAggregator(name, selector); return new HyperUniquesAggregator(name, selector);
} }
throw new IAE( throw new IAE(
"Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, selector.classOfObject() "Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject
); );
} }
@ -91,12 +92,13 @@ public class HyperUniquesAggregatorFactory implements AggregatorFactory
return Aggregators.noopBufferAggregator(); return Aggregators.noopBufferAggregator();
} }
if (HyperLogLogCollector.class.isAssignableFrom(selector.classOfObject())) { final Class classOfObject = selector.classOfObject();
if (classOfObject.equals(Object.class) || HyperLogLogCollector.class.isAssignableFrom(classOfObject)) {
return new HyperUniquesBufferAggregator(selector); return new HyperUniquesBufferAggregator(selector);
} }
throw new IAE( throw new IAE(
"Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, selector.classOfObject() "Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject
); );
} }

View File

@ -24,12 +24,14 @@ import com.google.common.collect.Lists;
import com.metamx.common.ISE; import com.metamx.common.ISE;
import com.metamx.common.Pair; import com.metamx.common.Pair;
import com.metamx.common.guava.Accumulator; import com.metamx.common.guava.Accumulator;
import io.druid.data.input.MapBasedRow;
import io.druid.data.input.Row; import io.druid.data.input.Row;
import io.druid.data.input.Rows; import io.druid.data.input.Rows;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.DimensionSpec;
import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexSchema;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.List; import java.util.List;

View File

@ -248,7 +248,7 @@ public class IncrementalIndex implements Iterable<Row>
final ObjectColumnSelector<Object> rawColumnSelector = new ObjectColumnSelector<Object>() final ObjectColumnSelector<Object> rawColumnSelector = new ObjectColumnSelector<Object>()
{ {
@Override @Override
public Class<Object> classOfObject() public Class classOfObject()
{ {
return Object.class; return Object.class;
} }
@ -294,22 +294,17 @@ public class IncrementalIndex implements Iterable<Row>
public DimensionSelector makeDimensionSelector(final String dimension) public DimensionSelector makeDimensionSelector(final String dimension)
{ {
final String dimensionName = dimension.toLowerCase(); final String dimensionName = dimension.toLowerCase();
final List<String> dimensionValues = in.getDimension(dimensionName);
if (dimensionValues == null) {
return null;
}
final IncrementalIndex.DimDim dimValLookup = getDimension(dimensionName);
final int maxId = dimValLookup.size();
return new DimensionSelector() return new DimensionSelector()
{ {
@Override @Override
public IndexedInts getRow() public IndexedInts getRow()
{ {
final List<String> dimensionValues = in.getDimension(dimensionName);
final ArrayList<Integer> vals = Lists.newArrayList(); final ArrayList<Integer> vals = Lists.newArrayList();
for (String dimVal : dimensionValues) { if (dimensionValues != null) {
int id = dimValLookup.getId(dimVal); for (int i = 0; i < dimensionValues.size(); ++i) {
vals.add(id); vals.add(i);
}
} }
return new IndexedInts() return new IndexedInts()
@ -337,19 +332,19 @@ public class IncrementalIndex implements Iterable<Row>
@Override @Override
public int getValueCardinality() public int getValueCardinality()
{ {
return maxId; throw new UnsupportedOperationException("value cardinality is unknown in incremental index");
} }
@Override @Override
public String lookupName(int id) public String lookupName(int id)
{ {
return dimValLookup.getValue(id); return in.getDimension(dimensionName).get(id);
} }
@Override @Override
public int lookupId(String name) public int lookupId(String name)
{ {
return dimValLookup.getId(name); return in.getDimension(dimensionName).indexOf(name);
} }
}; };
} }

View File

@ -28,6 +28,7 @@ import io.druid.query.aggregation.CountAggregatorFactory;
import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory;
import io.druid.query.aggregation.JavaScriptAggregatorFactory; import io.druid.query.aggregation.JavaScriptAggregatorFactory;
import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory;
import io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator; import io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator;
import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
import io.druid.query.aggregation.post.ArithmeticPostAggregator; import io.druid.query.aggregation.post.ArithmeticPostAggregator;
@ -110,6 +111,11 @@ public class QueryRunnerTestHelper
"uniques", "uniques",
"quality_uniques" "quality_uniques"
); );
public static final CardinalityAggregatorFactory qualityCardinality = new CardinalityAggregatorFactory(
"cardinality",
Arrays.asList("quality"),
false
);
public static final ConstantPostAggregator constant = new ConstantPostAggregator("const", 1L, null); public static final ConstantPostAggregator constant = new ConstantPostAggregator("const", 1L, null);
public static final FieldAccessPostAggregator rowsPostAgg = new FieldAccessPostAggregator("rows", "rows"); public static final FieldAccessPostAggregator rowsPostAgg = new FieldAccessPostAggregator("rows", "rows");
public static final FieldAccessPostAggregator indexPostAgg = new FieldAccessPostAggregator("index", "index"); public static final FieldAccessPostAggregator indexPostAgg = new FieldAccessPostAggregator("index", "index");

View File

@ -217,6 +217,36 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(expectedResults, results, ""); TestHelper.assertExpectedObjects(expectedResults, results, "");
} }
@Test
public void testGroupByWithCardinality()
{
GroupByQuery query = GroupByQuery
.builder()
.setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setAggregatorSpecs(
Arrays.<AggregatorFactory>asList(
QueryRunnerTestHelper.rowsCount,
QueryRunnerTestHelper.qualityCardinality
)
)
.setGranularity(QueryRunnerTestHelper.allGran)
.build();
List<Row> expectedResults = Arrays.asList(
createExpectedRow(
"2011-04-01",
"rows",
26L,
"cardinality",
QueryRunnerTestHelper.UNIQUES_9
)
);
Iterable<Row> results = runQuery(query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test @Test
public void testGroupByWithDimExtractionFn() public void testGroupByWithDimExtractionFn()
{ {
@ -1209,7 +1239,7 @@ public class GroupByQueryRunnerTest
} }
@Test @Test
public void testSubqueryWithEverything() public void testSubqueryWithMultiColumnAggregators()
{ {
final GroupByQuery subquery = GroupByQuery final GroupByQuery subquery = GroupByQuery
.builder() .builder()
@ -1220,11 +1250,11 @@ public class GroupByQueryRunnerTest
.setAggregatorSpecs( .setAggregatorSpecs(
Arrays.asList( Arrays.asList(
QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx_subagg", "index"), new DoubleSumAggregatorFactory("idx_subagg", "index"),
new JavaScriptAggregatorFactory( new JavaScriptAggregatorFactory(
"js_agg", "js_agg",
Arrays.asList("index", "provider"), Arrays.asList("index", "provider"),
"function(index, dim){return index + dim.length;}", "function(current, index, dim){return current + index + dim.length;}",
"function(){return 0;}", "function(){return 0;}",
"function(a,b){return a + b;}" "function(a,b){return a + b;}"
) )
@ -1295,23 +1325,11 @@ public class GroupByQueryRunnerTest
.build(); .build();
List<Row> expectedResults = Arrays.asList( List<Row> expectedResults = Arrays.asList(
createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 11135.0), createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 11119.0, "js_outer_agg", 123.92274475097656),
createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 11118.0), createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 11078.0, "js_outer_agg", 82.62254333496094),
createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 11158.0), createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 11121.0, "js_outer_agg", 125.58358001708984),
createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 11120.0), createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 11120.0, "js_outer_agg", 124.13470458984375),
createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 11121.0), createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 11158.0, "js_outer_agg", 162.74722290039062)
createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 11078.0),
createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 11119.0),
createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 11147.0),
createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 11112.0),
createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 11166.0),
createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 11113.0),
createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 13447.0),
createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 11114.0),
createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 13505.0),
createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 11097.0),
createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 11126.0)
); );
// Subqueries are handled by the ToolChest // Subqueries are handled by the ToolChest