diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 47a77d5664b..c3c4cb0f813 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -133,6 +133,24 @@ Search filters can be used to filter on partial string matches. |dimension|The dimension to perform the search over.|yes| |query|A JSON object for the type of search. See below for more information.|yes| +### In filter + +In filter can be used to express the following SQL query: + +```sql + SELECT COUNT(*) AS 'Count' FROM `table` WHERE `outlaw` IN ('Good', 'Bad', 'Ugly') +``` + +The grammar for a IN filter is as follows: + +```json +{ + "type": "in", + "dimension": "outlaw", + "values": ["Good", "Bad", "Ugly"] +} +``` + #### Search Query Spec ##### Insensitive Contains diff --git a/processing/src/main/java/io/druid/query/filter/DimFilter.java b/processing/src/main/java/io/druid/query/filter/DimFilter.java index 15d32b28a43..565c8fc06a4 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilter.java @@ -32,7 +32,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name="regex", value=RegexDimFilter.class), @JsonSubTypes.Type(name="search", value=SearchQueryDimFilter.class), @JsonSubTypes.Type(name="javascript", value=JavaScriptDimFilter.class), - @JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class) + @JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class), + @JsonSubTypes.Type(name="in", value=InDimFilter.class) }) public interface DimFilter { diff --git a/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java b/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java index 1c7e9f8032f..abd96edb6b4 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java @@ -34,6 +34,7 @@ class DimFilterCacheHelper static final byte SEARCH_QUERY_TYPE_ID = 0x6; static final byte JAVASCRIPT_CACHE_ID = 0x7; static final byte SPATIAL_CACHE_ID = 0x8; + static final byte IN_CACHE_ID = 0x9; static byte[] computeCacheKey(byte cacheIdKey, List filters) { diff --git a/processing/src/main/java/io/druid/query/filter/InDimFilter.java b/processing/src/main/java/io/druid/query/filter/InDimFilter.java new file mode 100644 index 00000000000..78aade4bf53 --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/InDimFilter.java @@ -0,0 +1,87 @@ +package io.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.metamx.common.StringUtils; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; + +public class InDimFilter implements DimFilter +{ + private final List values; + private final String dimension; + + @JsonCreator + public InDimFilter(@JsonProperty("dimension") String dimension, @JsonProperty("values") List values) + { + Preconditions.checkNotNull(dimension, "dimension can not be null"); + this.values = (values == null) ? Collections.emptyList() : values; + this.dimension = dimension; + } + + @JsonProperty + public String getDimension() + { + return dimension; + } + + @JsonProperty + public List getValues() + { + return values; + } + + @Override + public byte[] getCacheKey() + { + byte[] dimensionBytes = StringUtils.toUtf8(dimension); + final byte[][] valuesBytes = new byte[values.size()][]; + int valuesBytesSize = 0; + int index = 0; + for (String value : values) { + valuesBytes[index] = StringUtils.toUtf8(value); + valuesBytesSize += valuesBytes[index].length + 1; + ++index; + } + + ByteBuffer filterCacheKey = ByteBuffer.allocate(2 + dimensionBytes.length + valuesBytesSize) + .put(DimFilterCacheHelper.IN_CACHE_ID) + .put(dimensionBytes) + .put((byte) 0xFF); + for (byte [] bytes: valuesBytes) { + filterCacheKey.put(bytes) + .put((byte) 0xFF); + } + return filterCacheKey.array(); + } + + @Override + public int hashCode() + { + int result = getValues().hashCode(); + result = 31 * result + getDimension().hashCode(); + return result; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof InDimFilter)) { + return false; + } + + InDimFilter that = (InDimFilter) o; + + if (!values.equals(that.values)) { + return false; + } + return dimension.equals(that.dimension); + + } +} diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index ee93583d9e3..7931f24b290 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -23,6 +23,7 @@ import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.ExtractionDimFilter; import io.druid.query.filter.Filter; +import io.druid.query.filter.InDimFilter; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.NotDimFilter; import io.druid.query.filter.OrDimFilter; @@ -93,6 +94,21 @@ public class Filters final SpatialDimFilter spatialDimFilter = (SpatialDimFilter) dimFilter; filter = new SpatialFilter(spatialDimFilter.getDimension(), spatialDimFilter.getBound()); + } else if (dimFilter instanceof InDimFilter) { + final InDimFilter inDimFilter = (InDimFilter) dimFilter; + final List listFilters = Lists.transform( + inDimFilter.getValues(), new Function() + { + @Nullable + @Override + public Filter apply(@Nullable String input) + { + return new SelectorFilter(inDimFilter.getDimension(), input); + } + } + ); + + filter = new OrFilter(listFilters); } return filter; diff --git a/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java b/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java new file mode 100644 index 00000000000..09eeb893183 --- /dev/null +++ b/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java @@ -0,0 +1,50 @@ +package io.druid.query.filter; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.inject.Injector; +import com.google.inject.Key; +import io.druid.guice.GuiceInjectors; +import io.druid.guice.annotations.Json; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; + +public class InDimFilterSerDesrTest +{ + private static ObjectMapper mapper; + + private final String actualInFilter = "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"good\",\"bad\"]}"; + @Before + public void setUp() + { + Injector defaultInjector = GuiceInjectors.makeStartupInjector(); + mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class)); + } + + @Test + public void testDeserialization() throws IOException + { + final InDimFilter actualInDimFilter = mapper.reader(DimFilter.class).readValue(actualInFilter); + final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad")); + Assert.assertEquals(expectedInDimFilter, actualInDimFilter); + } + + @Test + public void testSerialization() throws IOException + { + final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad")); + final String expectedInFilter = mapper.writeValueAsString(dimInFilter); + Assert.assertEquals(expectedInFilter, actualInFilter); + } + + @Test + public void testGetCacheKey() + { + final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad")); + final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Arrays.asList("good,bad")); + Assert.assertNotEquals(inDimFilter_1.getCacheKey(), inDimFilter_2.getCacheKey()); + } +} diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 99e8483e7c4..bfc868b4ea3 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -37,6 +37,7 @@ import io.druid.query.aggregation.DoubleMinAggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; import io.druid.query.filter.NotDimFilter; import io.druid.query.filter.RegexDimFilter; import io.druid.query.filter.SelectorDimFilter; @@ -53,7 +54,6 @@ import org.junit.runners.Parameterized; import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -1185,6 +1185,66 @@ public class TimeseriesQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, results); } + + @Test + public void testTimeseriesWithInFilter() + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters( + new InDimFilter( + QueryRunnerTestHelper.marketDimension, Arrays.asList( + "spot", + "upfront", + "total_market", + "billyblank" + ) + ) + ) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexLongSum, + QueryRunnerTestHelper.qualityUniques + ) + ) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 13L, + "index", 6619L, + "addRowsIndexConstant", 6633.0, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ), + new Result<>( + new DateTime("2011-04-02"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 13L, + "index", 5827L, + "addRowsIndexConstant", 5841.0, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ); + + Iterable> results = Sequences.toList( + runner.run(query, CONTEXT), + Lists.>newArrayList() + ); + TestHelper.assertExpectedResults(expectedResults, results); + } + @Test public void testTimeseriesWithNonExistentFilterAndMultiDimAndOr() { @@ -1295,14 +1355,14 @@ public class TimeseriesQueryRunnerTest public void testTimeseriesWithFilterOnNonExistentDimensionSkipBuckets() { TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() - .dataSource(QueryRunnerTestHelper.dataSource) - .granularity(QueryRunnerTestHelper.dayGran) - .filters("bobby", "billy") - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators(QueryRunnerTestHelper.commonAggregators) - .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) - .context(ImmutableMap.of("skipEmptyBuckets", "true")) - .build(); + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters("bobby", "billy") + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .context(ImmutableMap.of("skipEmptyBuckets", "true")) + .build(); List> expectedResults = Arrays.asList(); @@ -1317,13 +1377,13 @@ public class TimeseriesQueryRunnerTest public void testTimeseriesWithNullFilterOnNonExistentDimension() { TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() - .dataSource(QueryRunnerTestHelper.dataSource) - .granularity(QueryRunnerTestHelper.dayGran) - .filters("bobby", null) - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators(QueryRunnerTestHelper.commonAggregators) - .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) - .build(); + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters("bobby", null) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); List> expectedResults = Arrays.asList( new Result<>( @@ -1361,13 +1421,13 @@ public class TimeseriesQueryRunnerTest public void testTimeseriesWithInvertedFilterOnNonExistentDimension() { TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() - .dataSource(QueryRunnerTestHelper.dataSource) - .granularity(QueryRunnerTestHelper.dayGran) - .filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally"))) - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators(QueryRunnerTestHelper.commonAggregators) - .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) - .build(); + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally"))) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); List> expectedResults = Arrays.asList( new Result<>( @@ -2025,17 +2085,17 @@ public class TimeseriesQueryRunnerTest public void testTimeseriesWithTimeColumn() { TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() - .dataSource(QueryRunnerTestHelper.dataSource) - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators( - Arrays.asList( - QueryRunnerTestHelper.rowsCount, - QueryRunnerTestHelper.jsCountIfTimeGreaterThan, - QueryRunnerTestHelper.__timeLongSum - ) - ) - .granularity(QueryRunnerTestHelper.allGran) - .build(); + .dataSource(QueryRunnerTestHelper.dataSource) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.jsCountIfTimeGreaterThan, + QueryRunnerTestHelper.__timeLongSum + ) + ) + .granularity(QueryRunnerTestHelper.allGran) + .build(); List> expectedResults = Arrays.asList( new Result<>(