Add bucket extraction function (#3033)

* add bucket extraction function

* add doc and header

* updated doc and test
This commit is contained in:
Dave Li 2016-06-17 12:24:27 -04:00 committed by Fangjin Yang
parent 8c76fe7b97
commit 12be1c0a4b
5 changed files with 237 additions and 1 deletions

View File

@ -414,6 +414,23 @@ or without setting "locale" (in this case, the current value of the default loca
}
```
### Bucket Extraction Function
Bucket extraction function is used to bucket numerical values in each range of the given size by converting them to the same base value. Non numeric values are converted to null.
* `size` : the size of the buckets (optional, default 1)
* `offset` : the offset for the buckets (optional, default 0)
The following extraction function creates buckets of 5 starting from 2. In this case, values in the range of [2, 7) will be converted to 2, values in [7, 12) will be converted to 7, etc.
```json
{
"type" : "bucket",
"size" : 5,
"offset" : 2
}
```
### Lookup DimensionSpecs
<div class="note caution">

View File

@ -0,0 +1,130 @@
/*
*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.primitives.Doubles;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
public class BucketExtractionFn implements ExtractionFn
{
private final double size;
private final double offset;
@JsonCreator
public BucketExtractionFn(
@Nullable
@JsonProperty("size") Double size,
@Nullable
@JsonProperty("offset") Double offset
)
{
this.size = size == null ? 1 : size;
this.offset = offset == null ? 0 : offset;
}
@JsonProperty
public double getSize()
{
return size;
}
@JsonProperty
public double getOffset()
{
return offset;
}
@Override
public String apply(Object value)
{
if (value instanceof Number) {
return bucket((Double) value);
} else if (value instanceof String) {
return apply(value);
}
return null;
}
@Override
public String apply(String value)
{
try {
return bucket(Double.parseDouble(value));
} catch (NumberFormatException | NullPointerException ex) {
return null;
}
}
@Override
public String apply(long value)
{
return bucket(value);
}
private String bucket(double value) {
double ret = Math.floor((value - offset) / size) * size + offset;
return ret == (long)ret ? String.valueOf((long)ret) : String.valueOf(ret);
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public byte[] getCacheKey()
{
return ByteBuffer.allocate(1 + 2 * Doubles.BYTES)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_BUCKET)
.putDouble(size)
.putDouble(offset)
.array();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
BucketExtractionFn that = (BucketExtractionFn) o;
return size == that.size && offset == that.offset;
}
}

View File

@ -37,4 +37,5 @@ public class ExtractionCacheHelper
public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA;
public static final byte CACHE_TYPE_ID_UPPER = 0xB;
public static final byte CACHE_TYPE_ID_LOWER = 0xC;
public static final byte CACHE_TYPE_ID_BUCKET = 0xD;
}

View File

@ -41,7 +41,8 @@ import io.druid.query.lookup.RegisteredLookupExtractionFn;
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class),
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class),
@JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class),
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class)
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class),
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)

View File

@ -0,0 +1,87 @@
/*
*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* /
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
public class BucketExtractionFnTest
{
private static final double DELTA = 0.0000001;
@Test
public void testApply()
{
BucketExtractionFn extractionFn1 = new BucketExtractionFn(100.0, 0.5);
Assert.assertEquals("1200.5", extractionFn1.apply("1234.99"));
Assert.assertEquals("0.5", extractionFn1.apply("1"));
Assert.assertEquals("0.5", extractionFn1.apply("100"));
Assert.assertEquals("500.5", extractionFn1.apply(501));
Assert.assertEquals("-399.5", extractionFn1.apply("-325"));
Assert.assertEquals("2400.5", extractionFn1.apply("2.42e3"));
Assert.assertEquals("-99.5", extractionFn1.apply("1.2e-1"));
Assert.assertEquals(null, extractionFn1.apply("should be null"));
Assert.assertEquals(null, extractionFn1.apply(""));
BucketExtractionFn extractionFn2 = new BucketExtractionFn(3.0, 2.0);
Assert.assertEquals("2", extractionFn2.apply("2"));
Assert.assertEquals("2", extractionFn2.apply("3"));
Assert.assertEquals("2", extractionFn2.apply("4.22"));
Assert.assertEquals("-10", extractionFn2.apply("-8"));
Assert.assertEquals("71", extractionFn2.apply("7.1e1"));
}
@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json1 = "{ \"type\" : \"bucket\", \"size\" : \"2\", \"offset\" : \"0.5\" }";
BucketExtractionFn extractionFn1 = (BucketExtractionFn)objectMapper.readValue(json1, ExtractionFn.class);
Assert.assertEquals(2, extractionFn1.getSize(), DELTA);
Assert.assertEquals(0.5, extractionFn1.getOffset(), DELTA);
Assert.assertEquals(
extractionFn1,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFn1),
ExtractionFn.class
)
);
final String json2 = "{ \"type\" : \"bucket\"}";
BucketExtractionFn extractionFn2 = (BucketExtractionFn)objectMapper.readValue(json2, ExtractionFn.class);
Assert.assertEquals(1, extractionFn2.getSize(), DELTA);
Assert.assertEquals(0, extractionFn2.getOffset(), DELTA);
Assert.assertEquals(
extractionFn2,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFn2),
ExtractionFn.class
)
);
}
}