Merge pull request #1912 from b-slim/in_filter

Adding in filter
This commit is contained in:
Charles Allen 2015-11-06 14:47:47 -08:00
commit 61139b9dfa
7 changed files with 268 additions and 35 deletions

View File

@ -133,6 +133,24 @@ Search filters can be used to filter on partial string matches.
|dimension|The dimension to perform the search over.|yes|
|query|A JSON object for the type of search. See below for more information.|yes|
### In filter
In filter can be used to express the following SQL query:
```sql
SELECT COUNT(*) AS 'Count' FROM `table` WHERE `outlaw` IN ('Good', 'Bad', 'Ugly')
```
The grammar for a IN filter is as follows:
```json
{
"type": "in",
"dimension": "outlaw",
"values": ["Good", "Bad", "Ugly"]
}
```
#### Search Query Spec
##### Insensitive Contains

View File

@ -32,7 +32,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name="regex", value=RegexDimFilter.class),
@JsonSubTypes.Type(name="search", value=SearchQueryDimFilter.class),
@JsonSubTypes.Type(name="javascript", value=JavaScriptDimFilter.class),
@JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class)
@JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class),
@JsonSubTypes.Type(name="in", value=InDimFilter.class)
})
public interface DimFilter
{

View File

@ -34,6 +34,7 @@ class DimFilterCacheHelper
static final byte SEARCH_QUERY_TYPE_ID = 0x6;
static final byte JAVASCRIPT_CACHE_ID = 0x7;
static final byte SPATIAL_CACHE_ID = 0x8;
static final byte IN_CACHE_ID = 0x9;
static byte[] computeCacheKey(byte cacheIdKey, List<DimFilter> filters)
{

View File

@ -0,0 +1,87 @@
package io.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
public class InDimFilter implements DimFilter
{
private final List<String> values;
private final String dimension;
@JsonCreator
public InDimFilter(@JsonProperty("dimension") String dimension, @JsonProperty("values") List<String> values)
{
Preconditions.checkNotNull(dimension, "dimension can not be null");
this.values = (values == null) ? Collections.<String>emptyList() : values;
this.dimension = dimension;
}
@JsonProperty
public String getDimension()
{
return dimension;
}
@JsonProperty
public List<String> getValues()
{
return values;
}
@Override
public byte[] getCacheKey()
{
byte[] dimensionBytes = StringUtils.toUtf8(dimension);
final byte[][] valuesBytes = new byte[values.size()][];
int valuesBytesSize = 0;
int index = 0;
for (String value : values) {
valuesBytes[index] = StringUtils.toUtf8(value);
valuesBytesSize += valuesBytes[index].length + 1;
++index;
}
ByteBuffer filterCacheKey = ByteBuffer.allocate(2 + dimensionBytes.length + valuesBytesSize)
.put(DimFilterCacheHelper.IN_CACHE_ID)
.put(dimensionBytes)
.put((byte) 0xFF);
for (byte [] bytes: valuesBytes) {
filterCacheKey.put(bytes)
.put((byte) 0xFF);
}
return filterCacheKey.array();
}
@Override
public int hashCode()
{
int result = getValues().hashCode();
result = 31 * result + getDimension().hashCode();
return result;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (!(o instanceof InDimFilter)) {
return false;
}
InDimFilter that = (InDimFilter) o;
if (!values.equals(that.values)) {
return false;
}
return dimension.equals(that.dimension);
}
}

View File

@ -23,6 +23,7 @@ import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.Filter;
import io.druid.query.filter.InDimFilter;
import io.druid.query.filter.JavaScriptDimFilter;
import io.druid.query.filter.NotDimFilter;
import io.druid.query.filter.OrDimFilter;
@ -93,6 +94,21 @@ public class Filters
final SpatialDimFilter spatialDimFilter = (SpatialDimFilter) dimFilter;
filter = new SpatialFilter(spatialDimFilter.getDimension(), spatialDimFilter.getBound());
} else if (dimFilter instanceof InDimFilter) {
final InDimFilter inDimFilter = (InDimFilter) dimFilter;
final List<Filter> listFilters = Lists.transform(
inDimFilter.getValues(), new Function<String, Filter>()
{
@Nullable
@Override
public Filter apply(@Nullable String input)
{
return new SelectorFilter(inDimFilter.getDimension(), input);
}
}
);
filter = new OrFilter(listFilters);
}
return filter;

View File

@ -0,0 +1,50 @@
package io.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Injector;
import com.google.inject.Key;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.annotations.Json;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
public class InDimFilterSerDesrTest
{
private static ObjectMapper mapper;
private final String actualInFilter = "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"good\",\"bad\"]}";
@Before
public void setUp()
{
Injector defaultInjector = GuiceInjectors.makeStartupInjector();
mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class));
}
@Test
public void testDeserialization() throws IOException
{
final InDimFilter actualInDimFilter = mapper.reader(DimFilter.class).readValue(actualInFilter);
final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"));
Assert.assertEquals(expectedInDimFilter, actualInDimFilter);
}
@Test
public void testSerialization() throws IOException
{
final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"));
final String expectedInFilter = mapper.writeValueAsString(dimInFilter);
Assert.assertEquals(expectedInFilter, actualInFilter);
}
@Test
public void testGetCacheKey()
{
final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"));
final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Arrays.asList("good,bad"));
Assert.assertNotEquals(inDimFilter_1.getCacheKey(), inDimFilter_2.getCacheKey());
}
}

View File

@ -37,6 +37,7 @@ import io.druid.query.aggregation.DoubleMinAggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.InDimFilter;
import io.druid.query.filter.NotDimFilter;
import io.druid.query.filter.RegexDimFilter;
import io.druid.query.filter.SelectorDimFilter;
@ -53,7 +54,6 @@ import org.junit.runners.Parameterized;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -1185,6 +1185,66 @@ public class TimeseriesQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, results);
}
@Test
public void testTimeseriesWithInFilter()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters(
new InDimFilter(
QueryRunnerTestHelper.marketDimension, Arrays.asList(
"spot",
"upfront",
"total_market",
"billyblank"
)
)
)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(
Arrays.<AggregatorFactory>asList(
QueryRunnerTestHelper.rowsCount,
QueryRunnerTestHelper.indexLongSum,
QueryRunnerTestHelper.qualityUniques
)
)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(
new Result<>(
new DateTime("2011-04-01"),
new TimeseriesResultValue(
ImmutableMap.<String, Object>of(
"rows", 13L,
"index", 6619L,
"addRowsIndexConstant", 6633.0,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
),
new Result<>(
new DateTime("2011-04-02"),
new TimeseriesResultValue(
ImmutableMap.<String, Object>of(
"rows", 13L,
"index", 5827L,
"addRowsIndexConstant", 5841.0,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
);
Iterable<Result<TimeseriesResultValue>> results = Sequences.toList(
runner.run(query, CONTEXT),
Lists.<Result<TimeseriesResultValue>>newArrayList()
);
TestHelper.assertExpectedResults(expectedResults, results);
}
@Test
public void testTimeseriesWithNonExistentFilterAndMultiDimAndOr()
{
@ -1295,14 +1355,14 @@ public class TimeseriesQueryRunnerTest
public void testTimeseriesWithFilterOnNonExistentDimensionSkipBuckets()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters("bobby", "billy")
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.context(ImmutableMap.<String, Object>of("skipEmptyBuckets", "true"))
.build();
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters("bobby", "billy")
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.context(ImmutableMap.<String, Object>of("skipEmptyBuckets", "true"))
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList();
@ -1317,13 +1377,13 @@ public class TimeseriesQueryRunnerTest
public void testTimeseriesWithNullFilterOnNonExistentDimension()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters("bobby", null)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters("bobby", null)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(
new Result<>(
@ -1361,13 +1421,13 @@ public class TimeseriesQueryRunnerTest
public void testTimeseriesWithInvertedFilterOnNonExistentDimension()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally")))
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally")))
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(
new Result<>(
@ -2025,17 +2085,17 @@ public class TimeseriesQueryRunnerTest
public void testTimeseriesWithTimeColumn()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
QueryRunnerTestHelper.jsCountIfTimeGreaterThan,
QueryRunnerTestHelper.__timeLongSum
)
)
.granularity(QueryRunnerTestHelper.allGran)
.build();
.dataSource(QueryRunnerTestHelper.dataSource)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
QueryRunnerTestHelper.jsCountIfTimeGreaterThan,
QueryRunnerTestHelper.__timeLongSum
)
)
.granularity(QueryRunnerTestHelper.allGran)
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(
new Result<>(