From 77afdf25e32fddd1aab282edb206383e694c9efe Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Thu, 10 Dec 2015 08:42:04 -0600 Subject: [PATCH] adding Bound Filter --- docs/content/querying/filters.md | 95 ++++++++++ .../io/druid/query/filter/BoundDimFilter.java | 172 ++++++++++++++++++ .../java/io/druid/query/filter/DimFilter.java | 4 +- .../query/filter/DimFilterCacheHelper.java | 1 + .../io/druid/segment/filter/BoundFilter.java | 70 +++++++ .../java/io/druid/segment/filter/Filters.java | 6 +- .../query/filter/BoundDimFilterTests.java | 77 ++++++++ .../timeseries/TimeseriesQueryRunnerTest.java | 80 ++++++++ 8 files changed, 503 insertions(+), 2 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/filter/BoundDimFilter.java create mode 100644 processing/src/main/java/io/druid/segment/filter/BoundFilter.java create mode 100644 processing/src/test/java/io/druid/query/filter/BoundDimFilterTests.java diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index c3c4cb0f813..629151069f4 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -151,6 +151,101 @@ The grammar for a IN filter is as follows: } ``` +### Bound filter + +Bound filter can be used to filter by comparing dimension values to an upper value or/and a lower value. +By default Comparison is string based and **case sensitive**. +To use numeric comparison you can set `alphaNumeric` to `true`. +By default the bound filter is a not a strict inclusion `inputString <= upper && inputSting >= lower`. + +The grammar for a bound filter is as follows: + +```json +{ + "type": "bound", + "dimension": "age", + "lower": "21", + "upper": "31" , + "alphaNumeric": true +} +``` +Equivalent to retain column if `21 <= age <= 31` + +```json +{ + "type": "bound", + "dimension": "name", + "lower": "foo", + "upper": "hoo" +} +``` + +Equivalent to retain column if `foo <= name <= hoo` + +In order to have a strict inclusion user can set `lowerStrict` or/and `upperStrict` to `true` + +To have strict bounds: + +```json +{ + "type": "bound", + "dimension": "age", + "lower": "21", + "lowerStrict": true, + "upper": "31" , + "upperStrict": true, + "alphaNumeric": true +} +``` +Equivalent to retain column if `21 < age < 31` + +To have strict upper bound: + +```json +{ + "type": "bound", + "dimension": "age", + "lower": "21", + "upper": "31" , + "upperStrict": true, + "alphaNumeric": true +} +``` + +Equivalent to retain column if `21 <= age < 31` + +To compare to only an upper bound or lowe bound + +```json +{ + "type": "bound", + "dimension": "age", + "upper": "31" , + "upperStrict": true, + "alphaNumeric": true +} +``` + +Equivalent to retain column if `age < 31` + +```json +{ + "type": "bound", + "dimension": "age", + "lower": "18" , + "alphaNumeric": true +} +``` + +Equivalent to retain column if ` 18 <= age` + +For `alphaNumeric` comparator, in case of the dimension value includes none-digits you may expect **fuzzy matching** +If dimension value starts with a none digit, the filter will consider it out of range (`value < lowerBound` and `value > upperBound`) +If dimension value starts with digit and contains a none digits comparing will be done character wise. +For instance suppose lower bound is `100` and value is `10K` the filter will match (`100 < 10K` returns `true`) since `K` is greater than any digit +Now suppose that the lower bound is `110` the filter will not match (`110 < 10K` returns `false`) + + #### Search Query Spec ##### Insensitive Contains diff --git a/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java new file mode 100644 index 00000000000..c50e0e7f743 --- /dev/null +++ b/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java @@ -0,0 +1,172 @@ +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.metamx.common.StringUtils; + +import java.nio.ByteBuffer; + +public class BoundDimFilter implements DimFilter +{ + private final String dimension; + private final String upper; + private final String lower; + private final boolean lowerStrict; + private final boolean upperStrict; + private final boolean alphaNumeric; + + @JsonCreator + public BoundDimFilter( + @JsonProperty("dimension") String dimension, + @JsonProperty("lower") String lower, + @JsonProperty("upper") String upper, + @JsonProperty("lowerStrict") Boolean lowerStrict, + @JsonProperty("upperStrict") Boolean upperStrict, + @JsonProperty("alphaNumeric") Boolean alphaNumeric + ) + { + this.dimension = Preconditions.checkNotNull(dimension, "dimension can not be null"); + Preconditions.checkState((lower != null) || (upper != null), "lower and upper can not be null at the same time"); + this.upper = upper; + this.lower = lower; + this.lowerStrict = (lowerStrict == null) ? false : lowerStrict; + this.upperStrict = (upperStrict == null) ? false : upperStrict; + this.alphaNumeric = (alphaNumeric == null) ? false : alphaNumeric; + } + + @JsonProperty + public String getDimension() + { + return dimension; + } + + @JsonProperty + public String getUpper() + { + return upper; + } + + @JsonProperty + public String getLower() + { + return lower; + } + + @JsonProperty + public boolean isLowerStrict() + { + return lowerStrict; + } + + @JsonProperty + public boolean isUpperStrict() + { + return upperStrict; + } + + @JsonProperty + public boolean isAlphaNumeric() + { + return alphaNumeric; + } + + @Override + public byte[] getCacheKey() + { + byte[] dimensionBytes = StringUtils.toUtf8(this.getDimension()); + byte[] lowerBytes = this.getLower() == null ? new byte[0] : StringUtils.toUtf8(this.getLower()); + byte[] upperBytes = this.getUpper() == null ? new byte[0] : StringUtils.toUtf8(this.getUpper()); + byte boundType = 0x1; + if (this.getLower() == null) { + boundType = 0x2; + } else if (this.getUpper() == null) { + boundType = 0x3; + } + + byte lowerStrictByte = (this.isLowerStrict() == false) ? 0x0 : (byte) 1; + byte upperStrictByte = (this.isUpperStrict() == false) ? 0x0 : (byte) 1; + byte AlphaNumericByte = (this.isAlphaNumeric() == false) ? 0x0 : (byte) 1; + + ByteBuffer boundCacheBuffer = ByteBuffer.allocate( + 8 + + dimensionBytes.length + + upperBytes.length + + lowerBytes.length + ); + boundCacheBuffer.put(DimFilterCacheHelper.BOUND_CACHE_ID) + .put(boundType) + .put(upperStrictByte) + .put(lowerStrictByte) + .put(AlphaNumericByte) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(dimensionBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(upperBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(lowerBytes); + return boundCacheBuffer.array(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof BoundDimFilter)) { + return false; + } + + BoundDimFilter that = (BoundDimFilter) o; + + if (isLowerStrict() != that.isLowerStrict()) { + return false; + } + if (isUpperStrict() != that.isUpperStrict()) { + return false; + } + if (isAlphaNumeric() != that.isAlphaNumeric()) { + return false; + } + if (!getDimension().equals(that.getDimension())) { + return false; + } + if (getUpper() != null ? !getUpper().equals(that.getUpper()) : that.getUpper() != null) { + return false; + } + return !(getLower() != null ? !getLower().equals(that.getLower()) : that.getLower() != null); + + } + + @Override + public int hashCode() + { + int result = getDimension().hashCode(); + result = 31 * result + (getUpper() != null ? getUpper().hashCode() : 0); + result = 31 * result + (getLower() != null ? getLower().hashCode() : 0); + result = 31 * result + (isLowerStrict() ? 1 : 0); + result = 31 * result + (isUpperStrict() ? 1 : 0); + result = 31 * result + (isAlphaNumeric() ? 1 : 0); + return result; + } +} diff --git a/processing/src/main/java/io/druid/query/filter/DimFilter.java b/processing/src/main/java/io/druid/query/filter/DimFilter.java index 8d96bfcab27..a2bae192d41 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilter.java @@ -35,7 +35,9 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name="search", value=SearchQueryDimFilter.class), @JsonSubTypes.Type(name="javascript", value=JavaScriptDimFilter.class), @JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class), - @JsonSubTypes.Type(name="in", value=InDimFilter.class) + @JsonSubTypes.Type(name="in", value=InDimFilter.class), + @JsonSubTypes.Type(name="bound", value=BoundDimFilter.class) + }) public interface DimFilter { diff --git a/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java b/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java index 1724a85ceb2..d6e970d5442 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilterCacheHelper.java @@ -38,6 +38,7 @@ class DimFilterCacheHelper static final byte SPATIAL_CACHE_ID = 0x8; static final byte IN_CACHE_ID = 0x9; static final byte STRING_SEPARATOR = (byte) 0xFF; + public static byte BOUND_CACHE_ID = 0xA; static byte[] computeCacheKey(byte cacheIdKey, List filters) { diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java new file mode 100644 index 00000000000..b91806b684e --- /dev/null +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -0,0 +1,70 @@ +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.segment.filter; + +import com.google.common.base.Predicate; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.topn.AlphaNumericTopNMetricSpec; +import io.druid.query.topn.LexicographicTopNMetricSpec; + +import java.util.Comparator; + +public class BoundFilter extends DimensionPredicateFilter +{ + + public BoundFilter(final BoundDimFilter boundDimFilter) + { + super( + boundDimFilter.getDimension(), new Predicate() + { + @Override + public boolean apply(String input) + { + if (input == null) { + return false; + } + Comparator comparator; + if (boundDimFilter.isAlphaNumeric()) { + comparator = new AlphaNumericTopNMetricSpec(null).getComparator(null, null); + } else { + comparator = new LexicographicTopNMetricSpec(null).getComparator(null, null); + } + + int lowerComparing = 1; + int upperComparing = 1; + if (boundDimFilter.getLower() != null) { + lowerComparing = comparator.compare(input, boundDimFilter.getLower()); + } + if (boundDimFilter.getUpper() != null) { + upperComparing = comparator.compare(boundDimFilter.getUpper(), input); + } + if (boundDimFilter.isLowerStrict() && boundDimFilter.isUpperStrict()) { + return ((lowerComparing > 0)) && ( upperComparing > 0); + } else if (boundDimFilter.isLowerStrict()) { + return (lowerComparing > 0) && (upperComparing >= 0); + } else if (boundDimFilter.isUpperStrict()) { + return (lowerComparing >= 0) && (upperComparing > 0); + } + return (lowerComparing >= 0) && (upperComparing >= 0); + } + } + ); + } +} diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 251c8506498..b9b4910deaf 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -22,6 +22,7 @@ package io.druid.segment.filter; import com.google.common.base.Function; import com.google.common.collect.Lists; import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.ExtractionDimFilter; import io.druid.query.filter.Filter; @@ -41,7 +42,8 @@ import java.util.List; */ public class Filters { - public static List convertDimensionFilters(List filters){ + public static List convertDimensionFilters(List filters) + { return Lists.transform( filters, new Function() @@ -111,6 +113,8 @@ public class Filters ); filter = new OrFilter(listFilters); + } else if (dimFilter instanceof BoundDimFilter) { + filter = new BoundFilter((BoundDimFilter) dimFilter); } return filter; diff --git a/processing/src/test/java/io/druid/query/filter/BoundDimFilterTests.java b/processing/src/test/java/io/druid/query/filter/BoundDimFilterTests.java new file mode 100644 index 00000000000..9cc3a6c94bb --- /dev/null +++ b/processing/src/test/java/io/druid/query/filter/BoundDimFilterTests.java @@ -0,0 +1,77 @@ + +/* +* Licensed to Metamarkets Group Inc. (Metamarkets) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. Metamarkets licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package io.druid.query.filter; + + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.inject.Injector; +import com.google.inject.Key; +import io.druid.guice.GuiceInjectors; +import io.druid.guice.annotations.Json; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.Arrays; + +@RunWith(Parameterized.class) +public class BoundDimFilterTests +{ + public BoundDimFilterTests(BoundDimFilter boundDimFilter) {this.boundDimFilter = boundDimFilter;} + + private final BoundDimFilter boundDimFilter; + + @Parameterized.Parameters + public static Iterable constructorFeeder(){ + + return ImmutableList.of(new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", null, true, false)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, true)}, + new Object[]{new BoundDimFilter("dimension", null, "15", null, true, true)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, null, null)}, + new Object[]{new BoundDimFilter("dimension", "12", null, true, null, true)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, true)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, false)}); + } + + @Test + public void testSerDesBoundFilter() throws IOException + { + Injector defaultInjector = GuiceInjectors.makeStartupInjector(); + ObjectMapper mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class)); + String serBetweenDimFilter = mapper.writeValueAsString(boundDimFilter); + BoundDimFilter actualBoundDimFilter = mapper.reader(DimFilter.class).readValue(serBetweenDimFilter); + Assert.assertEquals(boundDimFilter, actualBoundDimFilter); + } + + @Test + public void testGetCacheKey() + { + BoundDimFilter boundDimFilter = new BoundDimFilter("dimension", "12", "15", null, null, true); + BoundDimFilter boundDimFilterCopy = new BoundDimFilter("dimension", "12", "15", false, false, true); + Assert.assertArrayEquals(boundDimFilter.getCacheKey(), boundDimFilterCopy.getCacheKey()); + BoundDimFilter anotherBoundDimFilter = new BoundDimFilter("dimension", "12", "15", true, null, false); + Assert.assertFalse(Arrays.equals(anotherBoundDimFilter.getCacheKey(), boundDimFilter.getCacheKey())); + } +} diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 8b3645e051e..7ec34b473a5 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -38,6 +38,7 @@ import io.druid.query.aggregation.FilteredAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.InDimFilter; import io.druid.query.filter.NotDimFilter; @@ -2122,4 +2123,83 @@ public class TimeseriesQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, actualResults); } + + @Test + public void testTimeseriesWithBetweenFilter1() + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters( + new AndDimFilter( + Arrays.asList( + new BoundDimFilter( + QueryRunnerTestHelper.marketDimension, + "spa", + "spot", + true, + null, + null + ), + new BoundDimFilter( + QueryRunnerTestHelper.marketDimension, + "spot", + "spotify", + null, + true, + null + ), + (DimFilter) new BoundDimFilter( + QueryRunnerTestHelper.marketDimension, + "SPOT", + "spot", + null, + null, + null + ) + ) + ) + ) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + QueryRunnerTestHelper.indexLongSum, + QueryRunnerTestHelper.qualityUniques + ) + ) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 9L, + "index", 1102L, + "addRowsIndexConstant", 1112.0, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ), + new Result<>( + new DateTime("2011-04-02"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 9L, + "index", 1120L, + "addRowsIndexConstant", 1130.0, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ); + + Iterable> results = Sequences.toList( + runner.run(query, CONTEXT), + Lists.>newArrayList() + ); + TestHelper.assertExpectedResults(expectedResults, results); + } }