mirror of
https://github.com/apache/druid.git
synced 2025-03-08 10:30:38 +00:00
Add bucket extraction function (#3033)
* add bucket extraction function * add doc and header * updated doc and test
This commit is contained in:
parent
8c76fe7b97
commit
12be1c0a4b
@ -414,6 +414,23 @@ or without setting "locale" (in this case, the current value of the default loca
|
||||
}
|
||||
```
|
||||
|
||||
### Bucket Extraction Function
|
||||
|
||||
Bucket extraction function is used to bucket numerical values in each range of the given size by converting them to the same base value. Non numeric values are converted to null.
|
||||
|
||||
* `size` : the size of the buckets (optional, default 1)
|
||||
* `offset` : the offset for the buckets (optional, default 0)
|
||||
|
||||
The following extraction function creates buckets of 5 starting from 2. In this case, values in the range of [2, 7) will be converted to 2, values in [7, 12) will be converted to 7, etc.
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "bucket",
|
||||
"size" : 5,
|
||||
"offset" : 2
|
||||
}
|
||||
```
|
||||
|
||||
### Lookup DimensionSpecs
|
||||
|
||||
<div class="note caution">
|
||||
|
@ -0,0 +1,130 @@
|
||||
/*
|
||||
*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.primitives.Doubles;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public class BucketExtractionFn implements ExtractionFn
|
||||
{
|
||||
|
||||
private final double size;
|
||||
private final double offset;
|
||||
|
||||
@JsonCreator
|
||||
public BucketExtractionFn(
|
||||
@Nullable
|
||||
@JsonProperty("size") Double size,
|
||||
@Nullable
|
||||
@JsonProperty("offset") Double offset
|
||||
)
|
||||
{
|
||||
this.size = size == null ? 1 : size;
|
||||
this.offset = offset == null ? 0 : offset;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double getSize()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public double getOffset()
|
||||
{
|
||||
return offset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(Object value)
|
||||
{
|
||||
if (value instanceof Number) {
|
||||
return bucket((Double) value);
|
||||
} else if (value instanceof String) {
|
||||
return apply(value);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(String value)
|
||||
{
|
||||
try {
|
||||
return bucket(Double.parseDouble(value));
|
||||
} catch (NumberFormatException | NullPointerException ex) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String apply(long value)
|
||||
{
|
||||
return bucket(value);
|
||||
}
|
||||
|
||||
private String bucket(double value) {
|
||||
double ret = Math.floor((value - offset) / size) * size + offset;
|
||||
return ret == (long)ret ? String.valueOf((long)ret) : String.valueOf(ret);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preservesOrdering()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExtractionType getExtractionType()
|
||||
{
|
||||
return ExtractionType.MANY_TO_ONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getCacheKey()
|
||||
{
|
||||
return ByteBuffer.allocate(1 + 2 * Doubles.BYTES)
|
||||
.put(ExtractionCacheHelper.CACHE_TYPE_ID_BUCKET)
|
||||
.putDouble(size)
|
||||
.putDouble(offset)
|
||||
.array();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BucketExtractionFn that = (BucketExtractionFn) o;
|
||||
|
||||
return size == that.size && offset == that.offset;
|
||||
|
||||
}
|
||||
}
|
@ -37,4 +37,5 @@ public class ExtractionCacheHelper
|
||||
public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA;
|
||||
public static final byte CACHE_TYPE_ID_UPPER = 0xB;
|
||||
public static final byte CACHE_TYPE_ID_LOWER = 0xC;
|
||||
public static final byte CACHE_TYPE_ID_BUCKET = 0xD;
|
||||
}
|
||||
|
@ -41,7 +41,8 @@ import io.druid.query.lookup.RegisteredLookupExtractionFn;
|
||||
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class)
|
||||
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class),
|
||||
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class)
|
||||
})
|
||||
/**
|
||||
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
|
||||
|
@ -0,0 +1,87 @@
|
||||
/*
|
||||
*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
* /
|
||||
*/
|
||||
|
||||
package io.druid.query.extraction;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class BucketExtractionFnTest
|
||||
{
|
||||
private static final double DELTA = 0.0000001;
|
||||
|
||||
@Test
|
||||
public void testApply()
|
||||
{
|
||||
BucketExtractionFn extractionFn1 = new BucketExtractionFn(100.0, 0.5);
|
||||
Assert.assertEquals("1200.5", extractionFn1.apply("1234.99"));
|
||||
Assert.assertEquals("0.5", extractionFn1.apply("1"));
|
||||
Assert.assertEquals("0.5", extractionFn1.apply("100"));
|
||||
Assert.assertEquals("500.5", extractionFn1.apply(501));
|
||||
Assert.assertEquals("-399.5", extractionFn1.apply("-325"));
|
||||
Assert.assertEquals("2400.5", extractionFn1.apply("2.42e3"));
|
||||
Assert.assertEquals("-99.5", extractionFn1.apply("1.2e-1"));
|
||||
Assert.assertEquals(null, extractionFn1.apply("should be null"));
|
||||
Assert.assertEquals(null, extractionFn1.apply(""));
|
||||
|
||||
BucketExtractionFn extractionFn2 = new BucketExtractionFn(3.0, 2.0);
|
||||
Assert.assertEquals("2", extractionFn2.apply("2"));
|
||||
Assert.assertEquals("2", extractionFn2.apply("3"));
|
||||
Assert.assertEquals("2", extractionFn2.apply("4.22"));
|
||||
Assert.assertEquals("-10", extractionFn2.apply("-8"));
|
||||
Assert.assertEquals("71", extractionFn2.apply("7.1e1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerde() throws Exception
|
||||
{
|
||||
final ObjectMapper objectMapper = new DefaultObjectMapper();
|
||||
|
||||
final String json1 = "{ \"type\" : \"bucket\", \"size\" : \"2\", \"offset\" : \"0.5\" }";
|
||||
BucketExtractionFn extractionFn1 = (BucketExtractionFn)objectMapper.readValue(json1, ExtractionFn.class);
|
||||
Assert.assertEquals(2, extractionFn1.getSize(), DELTA);
|
||||
Assert.assertEquals(0.5, extractionFn1.getOffset(), DELTA);
|
||||
|
||||
Assert.assertEquals(
|
||||
extractionFn1,
|
||||
objectMapper.readValue(
|
||||
objectMapper.writeValueAsBytes(extractionFn1),
|
||||
ExtractionFn.class
|
||||
)
|
||||
);
|
||||
|
||||
final String json2 = "{ \"type\" : \"bucket\"}";
|
||||
BucketExtractionFn extractionFn2 = (BucketExtractionFn)objectMapper.readValue(json2, ExtractionFn.class);
|
||||
Assert.assertEquals(1, extractionFn2.getSize(), DELTA);
|
||||
Assert.assertEquals(0, extractionFn2.getOffset(), DELTA);
|
||||
|
||||
Assert.assertEquals(
|
||||
extractionFn2,
|
||||
objectMapper.readValue(
|
||||
objectMapper.writeValueAsBytes(extractionFn2),
|
||||
ExtractionFn.class
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user