Add DimFilterHavingSpec. (#3727)

* Add DimFilterHavingSpec.

* Add test for DimFilterHavingSpec with extractionFns.
This commit is contained in:
Gian Merlino 2016-12-02 10:04:30 -08:00 committed by Fangjin Yang
parent a8069f2441
commit 4c5d10f8a3
4 changed files with 232 additions and 1 deletions

View File

@ -8,6 +8,37 @@ It is essentially the equivalent of the HAVING clause in SQL.
Druid supports the following types of having clauses.
### Query filters
Query filter HavingSpecs allow all [Druid query filters](filters.html) to be used in the Having part of the query.
The grammar for a query filter HavingSpec is:
```json
{
"type" : "filter",
"filter" : <any Druid query filter>
}
```
For example, to use a selector filter:
```json
{
"type" : "filter",
"filter" : {
"type": "selector",
"dimension" : "<dimension>",
"value" : "<dimension_value>"
}
}
```
Note that behavior of the "filter" HavingSpec on an output field named "\_\_time" differs from most other HavingSpecs.
Unlike most other HavingSpecs, it will act on each row's timestamp rather than an output field named "\_\_time". We
recommend avoiding naming output fields "\_\_time". Future versions of Druid may enforce this.
### Numeric filters
The simplest having clause is a numeric filter.

View File

@ -0,0 +1,107 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.groupby.having;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import io.druid.data.input.Row;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.groupby.RowBasedValueMatcherFactory;
import java.nio.ByteBuffer;
public class DimFilterHavingSpec implements HavingSpec
{
private static final byte CACHE_KEY = (byte) 0x9;
private final DimFilter dimFilter;
private final RowBasedValueMatcherFactory valueMatcherFactory;
private final ValueMatcher valueMatcher;
@JsonCreator
public DimFilterHavingSpec(
@JsonProperty("filter") final DimFilter dimFilter
)
{
this.dimFilter = Preconditions.checkNotNull(dimFilter, "filter");
this.valueMatcherFactory = new RowBasedValueMatcherFactory();
this.valueMatcher = dimFilter.toFilter().makeMatcher(valueMatcherFactory);
}
@JsonProperty("filter")
public DimFilter getDimFilter()
{
return dimFilter;
}
@Override
public boolean eval(final Row row)
{
// Not thread safe, but it doesn't have to be.
valueMatcherFactory.setRow(row);
try {
return valueMatcher.matches();
}
finally {
valueMatcherFactory.setRow(null);
}
}
@Override
public byte[] getCacheKey()
{
final byte[] filterBytes = dimFilter.getCacheKey();
return ByteBuffer.allocate(1 + filterBytes.length)
.put(CACHE_KEY)
.put(filterBytes)
.array();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DimFilterHavingSpec that = (DimFilterHavingSpec) o;
return dimFilter.equals(that.dimFilter);
}
@Override
public int hashCode()
{
return dimFilter.hashCode();
}
@Override
public String toString()
{
return "DimFilterHavingSpec{" +
"dimFilter=" + dimFilter +
'}';
}
}

View File

@ -36,7 +36,8 @@ import io.druid.data.input.Row;
@JsonSubTypes.Type(name = "lessThan", value = LessThanHavingSpec.class),
@JsonSubTypes.Type(name = "equalTo", value = EqualToHavingSpec.class),
@JsonSubTypes.Type(name = "dimSelector", value = DimensionSelectorHavingSpec.class),
@JsonSubTypes.Type(name = "always", value = AlwaysHavingSpec.class)
@JsonSubTypes.Type(name = "always", value = AlwaysHavingSpec.class),
@JsonSubTypes.Type(name = "filter", value = DimFilterHavingSpec.class)
})
public interface HavingSpec
{

View File

@ -87,6 +87,7 @@ import io.druid.query.filter.OrDimFilter;
import io.druid.query.filter.RegexDimFilter;
import io.druid.query.filter.SearchQueryDimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.groupby.having.DimFilterHavingSpec;
import io.druid.query.groupby.having.DimensionSelectorHavingSpec;
import io.druid.query.groupby.having.EqualToHavingSpec;
import io.druid.query.groupby.having.GreaterThanHavingSpec;
@ -3262,6 +3263,97 @@ public class GroupByQueryRunnerTest
);
}
@Test
public void testDimFilterHavingSpec()
{
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L)
);
final DimFilterHavingSpec havingSpec = new DimFilterHavingSpec(
new AndDimFilter(
ImmutableList.of(
new OrDimFilter(
ImmutableList.of(
new BoundDimFilter("rows", "2", null, true, false, null, null, StringComparators.NUMERIC),
new SelectorDimFilter("idx", "217", null)
)
),
new SelectorDimFilter("__time", String.valueOf(new DateTime("2011-04-01").getMillis()), null)
)
)
);
GroupByQuery.Builder builder = GroupByQuery
.builder()
.setDataSource(QueryRunnerTestHelper.dataSource)
.setInterval("2011-04-02/2011-04-04")
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(new PeriodGranularity(new Period("P1M"), null, null))
.setHavingSpec(havingSpec);
final GroupByQuery fullQuery = builder.build();
TestHelper.assertExpectedObjects(
expectedResults,
GroupByQueryRunnerTestHelper.runQuery(factory, runner, fullQuery),
""
);
}
@Test
public void testDimFilterHavingSpecWithExtractionFns()
{
String extractionJsFn = "function(str) { return 'super-' + str; }";
ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getDefault());
String extractionJsFn2 = "function(num) { return num + 10; }";
ExtractionFn extractionFn2 = new JavaScriptExtractionFn(extractionJsFn2, false, JavaScriptConfig.getDefault());
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L)
);
final DimFilterHavingSpec havingSpec = new DimFilterHavingSpec(
new OrDimFilter(
ImmutableList.of(
new BoundDimFilter("rows", "12", null, true, false, null, extractionFn2, StringComparators.NUMERIC),
new SelectorDimFilter("idx", "super-217", extractionFn)
)
)
);
GroupByQuery.Builder builder = GroupByQuery
.builder()
.setDataSource(QueryRunnerTestHelper.dataSource)
.setInterval("2011-04-02/2011-04-04")
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(new PeriodGranularity(new Period("P1M"), null, null))
.setHavingSpec(havingSpec);
final GroupByQuery fullQuery = builder.build();
TestHelper.assertExpectedObjects(
expectedResults,
GroupByQueryRunnerTestHelper.runQuery(factory, runner, fullQuery),
""
);
}
@Test
public void testMergedHavingSpec()
{