Merge pull request #1936 from b-slim/between_range_with_predicat

adding Upper/Lower Bound Filter
This commit is contained in:
Fangjin Yang 2015-12-29 10:11:22 -08:00
commit e14ad74088
8 changed files with 503 additions and 2 deletions

View File

@ -151,6 +151,101 @@ The grammar for a IN filter is as follows:
}
```
### Bound filter
Bound filter can be used to filter by comparing dimension values to an upper value or/and a lower value.
By default Comparison is string based and **case sensitive**.
To use numeric comparison you can set `alphaNumeric` to `true`.
By default the bound filter is a not a strict inclusion `inputString <= upper && inputSting >= lower`.
The grammar for a bound filter is as follows:
```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"upper": "31" ,
"alphaNumeric": true
}
```
Equivalent to retain column if `21 <= age <= 31`
```json
{
"type": "bound",
"dimension": "name",
"lower": "foo",
"upper": "hoo"
}
```
Equivalent to retain column if `foo <= name <= hoo`
In order to have a strict inclusion user can set `lowerStrict` or/and `upperStrict` to `true`
To have strict bounds:
```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"lowerStrict": true,
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```
Equivalent to retain column if `21 < age < 31`
To have strict upper bound:
```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```
Equivalent to retain column if `21 <= age < 31`
To compare to only an upper bound or lowe bound
```json
{
"type": "bound",
"dimension": "age",
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```
Equivalent to retain column if `age < 31`
```json
{
"type": "bound",
"dimension": "age",
"lower": "18" ,
"alphaNumeric": true
}
```
Equivalent to retain column if ` 18 <= age`
For `alphaNumeric` comparator, in case of the dimension value includes none-digits you may expect **fuzzy matching**
If dimension value starts with a none digit, the filter will consider it out of range (`value < lowerBound` and `value > upperBound`)
If dimension value starts with digit and contains a none digits comparing will be done character wise.
For instance suppose lower bound is `100` and value is `10K` the filter will match (`100 < 10K` returns `true`) since `K` is greater than any digit
Now suppose that the lower bound is `110` the filter will not match (`110 < 10K` returns `false`)
#### Search Query Spec
##### Insensitive Contains

View File

@ -0,0 +1,172 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
public class BoundDimFilter implements DimFilter
{
private final String dimension;
private final String upper;
private final String lower;
private final boolean lowerStrict;
private final boolean upperStrict;
private final boolean alphaNumeric;
@JsonCreator
public BoundDimFilter(
@JsonProperty("dimension") String dimension,
@JsonProperty("lower") String lower,
@JsonProperty("upper") String upper,
@JsonProperty("lowerStrict") Boolean lowerStrict,
@JsonProperty("upperStrict") Boolean upperStrict,
@JsonProperty("alphaNumeric") Boolean alphaNumeric
)
{
this.dimension = Preconditions.checkNotNull(dimension, "dimension can not be null");
Preconditions.checkState((lower != null) || (upper != null), "lower and upper can not be null at the same time");
this.upper = upper;
this.lower = lower;
this.lowerStrict = (lowerStrict == null) ? false : lowerStrict;
this.upperStrict = (upperStrict == null) ? false : upperStrict;
this.alphaNumeric = (alphaNumeric == null) ? false : alphaNumeric;
}
@JsonProperty
public String getDimension()
{
return dimension;
}
@JsonProperty
public String getUpper()
{
return upper;
}
@JsonProperty
public String getLower()
{
return lower;
}
@JsonProperty
public boolean isLowerStrict()
{
return lowerStrict;
}
@JsonProperty
public boolean isUpperStrict()
{
return upperStrict;
}
@JsonProperty
public boolean isAlphaNumeric()
{
return alphaNumeric;
}
@Override
public byte[] getCacheKey()
{
byte[] dimensionBytes = StringUtils.toUtf8(this.getDimension());
byte[] lowerBytes = this.getLower() == null ? new byte[0] : StringUtils.toUtf8(this.getLower());
byte[] upperBytes = this.getUpper() == null ? new byte[0] : StringUtils.toUtf8(this.getUpper());
byte boundType = 0x1;
if (this.getLower() == null) {
boundType = 0x2;
} else if (this.getUpper() == null) {
boundType = 0x3;
}
byte lowerStrictByte = (this.isLowerStrict() == false) ? 0x0 : (byte) 1;
byte upperStrictByte = (this.isUpperStrict() == false) ? 0x0 : (byte) 1;
byte AlphaNumericByte = (this.isAlphaNumeric() == false) ? 0x0 : (byte) 1;
ByteBuffer boundCacheBuffer = ByteBuffer.allocate(
8
+ dimensionBytes.length
+ upperBytes.length
+ lowerBytes.length
);
boundCacheBuffer.put(DimFilterCacheHelper.BOUND_CACHE_ID)
.put(boundType)
.put(upperStrictByte)
.put(lowerStrictByte)
.put(AlphaNumericByte)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(dimensionBytes)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(upperBytes)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(lowerBytes);
return boundCacheBuffer.array();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (!(o instanceof BoundDimFilter)) {
return false;
}
BoundDimFilter that = (BoundDimFilter) o;
if (isLowerStrict() != that.isLowerStrict()) {
return false;
}
if (isUpperStrict() != that.isUpperStrict()) {
return false;
}
if (isAlphaNumeric() != that.isAlphaNumeric()) {
return false;
}
if (!getDimension().equals(that.getDimension())) {
return false;
}
if (getUpper() != null ? !getUpper().equals(that.getUpper()) : that.getUpper() != null) {
return false;
}
return !(getLower() != null ? !getLower().equals(that.getLower()) : that.getLower() != null);
}
@Override
public int hashCode()
{
int result = getDimension().hashCode();
result = 31 * result + (getUpper() != null ? getUpper().hashCode() : 0);
result = 31 * result + (getLower() != null ? getLower().hashCode() : 0);
result = 31 * result + (isLowerStrict() ? 1 : 0);
result = 31 * result + (isUpperStrict() ? 1 : 0);
result = 31 * result + (isAlphaNumeric() ? 1 : 0);
return result;
}
}

View File

@ -35,7 +35,9 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name="search", value=SearchQueryDimFilter.class),
@JsonSubTypes.Type(name="javascript", value=JavaScriptDimFilter.class),
@JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class),
@JsonSubTypes.Type(name="in", value=InDimFilter.class)
@JsonSubTypes.Type(name="in", value=InDimFilter.class),
@JsonSubTypes.Type(name="bound", value=BoundDimFilter.class)
})
public interface DimFilter
{

View File

@ -38,6 +38,7 @@ class DimFilterCacheHelper
static final byte SPATIAL_CACHE_ID = 0x8;
static final byte IN_CACHE_ID = 0x9;
static final byte STRING_SEPARATOR = (byte) 0xFF;
public static byte BOUND_CACHE_ID = 0xA;
static byte[] computeCacheKey(byte cacheIdKey, List<DimFilter> filters)
{

View File

@ -0,0 +1,70 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment.filter;
import com.google.common.base.Predicate;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.topn.AlphaNumericTopNMetricSpec;
import io.druid.query.topn.LexicographicTopNMetricSpec;
import java.util.Comparator;
public class BoundFilter extends DimensionPredicateFilter
{
public BoundFilter(final BoundDimFilter boundDimFilter)
{
super(
boundDimFilter.getDimension(), new Predicate<String>()
{
@Override
public boolean apply(String input)
{
if (input == null) {
return false;
}
Comparator<String> comparator;
if (boundDimFilter.isAlphaNumeric()) {
comparator = new AlphaNumericTopNMetricSpec(null).getComparator(null, null);
} else {
comparator = new LexicographicTopNMetricSpec(null).getComparator(null, null);
}
int lowerComparing = 1;
int upperComparing = 1;
if (boundDimFilter.getLower() != null) {
lowerComparing = comparator.compare(input, boundDimFilter.getLower());
}
if (boundDimFilter.getUpper() != null) {
upperComparing = comparator.compare(boundDimFilter.getUpper(), input);
}
if (boundDimFilter.isLowerStrict() && boundDimFilter.isUpperStrict()) {
return ((lowerComparing > 0)) && ( upperComparing > 0);
} else if (boundDimFilter.isLowerStrict()) {
return (lowerComparing > 0) && (upperComparing >= 0);
} else if (boundDimFilter.isUpperStrict()) {
return (lowerComparing >= 0) && (upperComparing > 0);
}
return (lowerComparing >= 0) && (upperComparing >= 0);
}
}
);
}
}

View File

@ -22,6 +22,7 @@ package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.Filter;
@ -41,7 +42,8 @@ import java.util.List;
*/
public class Filters
{
public static List<Filter> convertDimensionFilters(List<DimFilter> filters){
public static List<Filter> convertDimensionFilters(List<DimFilter> filters)
{
return Lists.transform(
filters,
new Function<DimFilter, Filter>()
@ -111,6 +113,8 @@ public class Filters
);
filter = new OrFilter(listFilters);
} else if (dimFilter instanceof BoundDimFilter) {
filter = new BoundFilter((BoundDimFilter) dimFilter);
}
return filter;

View File

@ -0,0 +1,77 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.inject.Injector;
import com.google.inject.Key;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.annotations.Json;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.IOException;
import java.util.Arrays;
@RunWith(Parameterized.class)
public class BoundDimFilterTests
{
public BoundDimFilterTests(BoundDimFilter boundDimFilter) {this.boundDimFilter = boundDimFilter;}
private final BoundDimFilter boundDimFilter;
@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder(){
return ImmutableList.of(new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, null)},
new Object[]{new BoundDimFilter("dimension", "12", "15", null, true, false)},
new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, true)},
new Object[]{new BoundDimFilter("dimension", null, "15", null, true, true)},
new Object[]{new BoundDimFilter("dimension", "12", "15", true, null, null)},
new Object[]{new BoundDimFilter("dimension", "12", null, true, null, true)},
new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, true)},
new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, false)});
}
@Test
public void testSerDesBoundFilter() throws IOException
{
Injector defaultInjector = GuiceInjectors.makeStartupInjector();
ObjectMapper mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class));
String serBetweenDimFilter = mapper.writeValueAsString(boundDimFilter);
BoundDimFilter actualBoundDimFilter = mapper.reader(DimFilter.class).readValue(serBetweenDimFilter);
Assert.assertEquals(boundDimFilter, actualBoundDimFilter);
}
@Test
public void testGetCacheKey()
{
BoundDimFilter boundDimFilter = new BoundDimFilter("dimension", "12", "15", null, null, true);
BoundDimFilter boundDimFilterCopy = new BoundDimFilter("dimension", "12", "15", false, false, true);
Assert.assertArrayEquals(boundDimFilter.getCacheKey(), boundDimFilterCopy.getCacheKey());
BoundDimFilter anotherBoundDimFilter = new BoundDimFilter("dimension", "12", "15", true, null, false);
Assert.assertFalse(Arrays.equals(anotherBoundDimFilter.getCacheKey(), boundDimFilter.getCacheKey()));
}
}

View File

@ -38,6 +38,7 @@ import io.druid.query.aggregation.FilteredAggregatorFactory;
import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.InDimFilter;
import io.druid.query.filter.NotDimFilter;
@ -2122,4 +2123,83 @@ public class TimeseriesQueryRunnerTest
TestHelper.assertExpectedResults(expectedResults, actualResults);
}
@Test
public void testTimeseriesWithBetweenFilter1()
{
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.dayGran)
.filters(
new AndDimFilter(
Arrays.asList(
new BoundDimFilter(
QueryRunnerTestHelper.marketDimension,
"spa",
"spot",
true,
null,
null
),
new BoundDimFilter(
QueryRunnerTestHelper.marketDimension,
"spot",
"spotify",
null,
true,
null
),
(DimFilter) new BoundDimFilter(
QueryRunnerTestHelper.marketDimension,
"SPOT",
"spot",
null,
null,
null
)
)
)
)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(
Arrays.<AggregatorFactory>asList(
QueryRunnerTestHelper.rowsCount,
QueryRunnerTestHelper.indexLongSum,
QueryRunnerTestHelper.qualityUniques
)
)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(
new Result<>(
new DateTime("2011-04-01"),
new TimeseriesResultValue(
ImmutableMap.<String, Object>of(
"rows", 9L,
"index", 1102L,
"addRowsIndexConstant", 1112.0,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
),
new Result<>(
new DateTime("2011-04-02"),
new TimeseriesResultValue(
ImmutableMap.<String, Object>of(
"rows", 9L,
"index", 1120L,
"addRowsIndexConstant", 1130.0,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
);
Iterable<Result<TimeseriesResultValue>> results = Sequences.toList(
runner.run(query, CONTEXT),
Lists.<Result<TimeseriesResultValue>>newArrayList()
);
TestHelper.assertExpectedResults(expectedResults, results);
}
}