Merge pull request #2285 from fjy/stringformat

fixed #1873, add ability to express CONCAT as an extractionFn
This commit is contained in:
Fangjin Yang 2016-01-19 11:23:12 -08:00
commit 1b359d6a47
15 changed files with 289 additions and 64 deletions

View File

@ -341,6 +341,16 @@ Example for chaining [regular expression extraction function](#regular-expressio
It will transform dimension values with specified extraction functions in the order named.
For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, javascript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`.
### String Format Extraction Function
Returns the dimension value formatted according to the given format string.
```json
{ "type" : "stringFormat", "format" : <sprintf_expression> }
```
For example if you want to concat "[" and "]" before and after the actual dimension value, you need to specify "[%s]" as format string.
### Filtering DimensionSpecs
These are only valid for multi-valued dimensions. If you have a row in druid that has a multi-valued dimension with values ["v1", "v2", "v3"] and you send a groupBy/topN query grouping by that dimension with [query filter](filter.html) for value "v1". In the response you will get 3 rows containing "v1", "v2" and "v3". This behavior might be unintuitive for some use cases.

View File

@ -29,38 +29,44 @@ import java.util.Arrays;
public class CascadeExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x9;
private final ExtractionFn extractionFns[];
private final ChainedExtractionFn chainedExtractionFn;
private final ChainedExtractionFn DEFAULT_CHAINED_EXTRACTION_FN = new ChainedExtractionFn(
new ExtractionFn() {
public byte[] getCacheKey() {
new ExtractionFn()
{
public byte[] getCacheKey()
{
return new byte[0];
}
public String apply(Object value) {
public String apply(Object value)
{
return null;
}
public String apply(String value) {
public String apply(String value)
{
return null;
}
public String apply(long value) {
public String apply(long value)
{
return null;
}
public boolean preservesOrdering() {
public boolean preservesOrdering()
{
return false;
}
public ExtractionType getExtractionType() {
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public String toString() {
public String toString()
{
return "nullExtractionFn{}";
}
},
@ -78,48 +84,55 @@ public class CascadeExtractionFn implements ExtractionFn
this.chainedExtractionFn = DEFAULT_CHAINED_EXTRACTION_FN;
} else {
ChainedExtractionFn root = null;
for (int idx = 0; idx < extractionFns.length; idx++) {
Preconditions.checkArgument(extractionFns[idx] != null, "empty function is not allowed");
root = new ChainedExtractionFn(extractionFns[idx], root);
for (ExtractionFn fn : extractionFn) {
Preconditions.checkArgument(fn != null, "empty function is not allowed");
root = new ChainedExtractionFn(fn, root);
}
this.chainedExtractionFn = root;
}
}
@JsonProperty
public ExtractionFn[] getExtractionFns() {
public ExtractionFn[] getExtractionFns()
{
return extractionFns;
}
@Override
public byte[] getCacheKey() {
byte[] cacheKey = new byte[] {CACHE_TYPE_ID};
public byte[] getCacheKey()
{
byte[] cacheKey = new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_CASCADE};
return Bytes.concat(cacheKey, chainedExtractionFn.getCacheKey());
}
@Override
public String apply(Object value) {
public String apply(Object value)
{
return chainedExtractionFn.apply(value);
}
@Override
public String apply(String value){
public String apply(String value)
{
return chainedExtractionFn.apply(value);
}
@Override
public String apply(long value){
public String apply(long value)
{
return chainedExtractionFn.apply(value);
}
@Override
public boolean preservesOrdering(){
public boolean preservesOrdering()
{
return chainedExtractionFn.preservesOrdering();
}
@Override
public ExtractionType getExtractionType(){
public ExtractionType getExtractionType()
{
return chainedExtractionFn.getExtractionType();
}
@ -152,44 +165,53 @@ public class CascadeExtractionFn implements ExtractionFn
}
@Override
public String toString() {
public String toString()
{
return "CascadeExtractionFn{" +
"extractionFns=[" + chainedExtractionFn.toString() + "]}";
"extractionFns=[" + chainedExtractionFn.toString() + "]}";
}
private class ChainedExtractionFn {
private class ChainedExtractionFn
{
private final ExtractionFn fn;
private final ChainedExtractionFn child;
public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child) {
public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child)
{
this.fn = fn;
this.child = child;
}
public byte[] getCacheKey() {
public byte[] getCacheKey()
{
byte[] fnCacheKey = fn.getCacheKey();
return (child != null) ? Bytes.concat(fnCacheKey, child.getCacheKey()) : fnCacheKey;
}
public String apply(Object value) {
public String apply(Object value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}
public String apply(String value){
public String apply(String value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}
public String apply(long value){
public String apply(long value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}
public boolean preservesOrdering(){
public boolean preservesOrdering()
{
boolean childPreservesOrdering = (child == null) || child.preservesOrdering();
return fn.preservesOrdering() && childPreservesOrdering;
}
public ExtractionType getExtractionType(){
public ExtractionType getExtractionType()
{
if (child != null && child.getExtractionType() == ExtractionType.MANY_TO_ONE) {
return ExtractionType.MANY_TO_ONE;
} else {
@ -227,10 +249,11 @@ public class CascadeExtractionFn implements ExtractionFn
return result;
}
public String toString() {
public String toString()
{
return (child != null)
? Joiner.on(",").join(child.toString(), fn.toString())
: fn.toString();
? Joiner.on(",").join(child.toString(), fn.toString())
: fn.toString();
}
}
}

View File

@ -24,7 +24,7 @@ public abstract class DimExtractionFn implements ExtractionFn
@Override
public String apply(Object value)
{
return apply(value.toString());
return apply(value == null ? null : value.toString());
}
@Override

View File

@ -0,0 +1,38 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
/**
*
*/
public class ExtractionCacheHelper
{
public static final byte CACHE_TYPE_ID_TIME_DIM = 0x0;
public static final byte CACHE_TYPE_ID_REGEX = 0x1;
public static final byte CACHE_TYPE_ID_MATCHING_DIM = 0x2;
public static final byte CACHE_TYPE_ID_SEARCH_QUERY = 0x3;
public static final byte CACHE_TYPE_ID_JAVASCRIPT = 0x4;
public static final byte CACHE_TYPE_ID_TIME_FORMAT = 0x5;
public static final byte CACHE_TYPE_ID_IDENTITY = 0x6;
public static final byte CACHE_TYPE_ID_LOOKUP = 0x7;
public static final byte CACHE_TYPE_ID_SUBSTRING = 0x8;
public static final byte CACHE_TYPE_ID_CASCADE = 0x9;
public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA;
}

View File

@ -35,7 +35,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class),
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class)
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class),
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
@ -56,7 +57,7 @@ public interface ExtractionFn
/**
* The "extraction" function. This should map a value into some other String value.
*
* <p>
* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
* empty string is considered invalid output for this method and should instead return null. This is
* a contract on the method rather than enforced at a lower level in order to eliminate a global check
@ -74,7 +75,7 @@ public interface ExtractionFn
/**
* Offers information on whether the extraction will preserve the original ordering of the values.
* <p/>
* <p>
* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards
* ordering.
*

View File

@ -23,8 +23,6 @@ import com.google.common.base.Strings;
public class IdentityExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x6;
private static final IdentityExtractionFn instance = new IdentityExtractionFn();
private IdentityExtractionFn()
@ -35,7 +33,7 @@ public class IdentityExtractionFn implements ExtractionFn
@Override
public byte[] getCacheKey()
{
return new byte[]{CACHE_TYPE_ID};
return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_IDENTITY};
}
@Override

View File

@ -61,8 +61,6 @@ public class JavaScriptExtractionFn implements ExtractionFn
};
}
private static final byte CACHE_TYPE_ID = 0x4;
private final String function;
private final Function<Object, String> fn;
private final boolean injective;
@ -97,7 +95,7 @@ public class JavaScriptExtractionFn implements ExtractionFn
{
byte[] bytes = StringUtils.toUtf8(function);
return ByteBuffer.allocate(1 + bytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_JAVASCRIPT)
.put(bytes)
.array();
}

View File

@ -33,8 +33,6 @@ import java.io.IOException;
public class LookupExtractionFn extends FunctionalExtraction
{
private static final byte CACHE_TYPE_ID = 0x7;
private final LookupExtractor lookup;
private final boolean optimize;
@ -98,7 +96,7 @@ public class LookupExtractionFn extends FunctionalExtraction
{
try {
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
outputStream.write(CACHE_TYPE_ID);
outputStream.write(ExtractionCacheHelper.CACHE_TYPE_ID_LOOKUP);
outputStream.write(lookup.getCacheKey());
if (getReplaceMissingValueWith() != null) {
outputStream.write(StringUtils.toUtf8(getReplaceMissingValueWith()));

View File

@ -32,8 +32,6 @@ import java.util.regex.Pattern;
*/
public class MatchingDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x2;
private final String expr;
private final Pattern pattern;
@ -53,7 +51,7 @@ public class MatchingDimExtractionFn extends DimExtractionFn
{
byte[] exprBytes = StringUtils.toUtf8(expr);
return ByteBuffer.allocate(1 + exprBytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_MATCHING_DIM)
.put(exprBytes)
.array();
}

View File

@ -33,7 +33,6 @@ import java.util.regex.Pattern;
*/
public class RegexDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x1;
private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF;
private final String expr;
@ -75,7 +74,7 @@ public class RegexDimExtractionFn extends DimExtractionFn
totalLen += 2; // separators
return ByteBuffer.allocate(totalLen)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_REGEX)
.put(exprBytes)
.put(CACHE_KEY_SEPARATOR)
.put(replaceStrBytes)

View File

@ -30,8 +30,6 @@ import java.nio.ByteBuffer;
*/
public class SearchQuerySpecDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x3;
private final SearchQuerySpec searchQuerySpec;
@JsonCreator
@ -55,7 +53,7 @@ public class SearchQuerySpecDimExtractionFn extends DimExtractionFn
{
byte[] specBytes = searchQuerySpec.getCacheKey();
return ByteBuffer.allocate(1 + specBytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_SEARCH_QUERY)
.put(specBytes)
.array();
}

View File

@ -0,0 +1,101 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
/**
*
*/
public class StringFormatExtractionFn extends DimExtractionFn
{
private final String format;
@JsonCreator
public StringFormatExtractionFn(
@JsonProperty("format") String format
)
{
Preconditions.checkArgument(!Strings.isNullOrEmpty(format), "format string should not be empty");
this.format = format;
}
@JsonProperty
public String getFormat()
{
return format;
}
@Override
public byte[] getCacheKey()
{
byte[] bytes = StringUtils.toUtf8(format);
return ByteBuffer.allocate(1 + bytes.length)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_STRING_FORMAT)
.put(bytes)
.array();
}
@Override
public String apply(String value)
{
return String.format(format, value);
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StringFormatExtractionFn that = (StringFormatExtractionFn) o;
return format.equals(that.format);
}
@Override
public int hashCode()
{
return format.hashCode();
}
}

View File

@ -33,8 +33,6 @@ import java.util.Date;
*/
public class TimeDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x0;
private final String timeFormat;
private final SimpleDateFormat timeFormatter;
private final String resultFormat;
@ -62,7 +60,7 @@ public class TimeDimExtractionFn extends DimExtractionFn
{
byte[] timeFormatBytes = StringUtils.toUtf8(timeFormat);
return ByteBuffer.allocate(1 + timeFormatBytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_DIM)
.put(timeFormatBytes)
.array();
}

View File

@ -32,8 +32,6 @@ import java.util.Locale;
public class TimeFormatExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x5;
private final DateTimeZone tz;
private final String pattern;
private final Locale locale;
@ -82,7 +80,7 @@ public class TimeFormatExtractionFn implements ExtractionFn
{
byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag());
return ByteBuffer.allocate(1 + exprBytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_FORMAT)
.put(exprBytes)
.array();
}
@ -102,7 +100,7 @@ public class TimeFormatExtractionFn implements ExtractionFn
@Override
public String apply(String value)
{
return apply((Object)value);
return apply((Object) value);
}
@Override

View File

@ -0,0 +1,67 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.extraction;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
/**
*
*/
public class StringFormatExtractionFnTest
{
@Test
public void testApply() throws Exception
{
StringFormatExtractionFn fn = new StringFormatExtractionFn("[%s]");
long test = 1000L;
Assert.assertEquals("[1000]", fn.apply(test));
}
@Test
public void testApplyNull() throws Exception
{
StringFormatExtractionFn fn = new StringFormatExtractionFn("[%s]");
String test = null;
Assert.assertEquals("[null]", fn.apply(test));
}
@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"type\" : \"stringFormat\", \"format\" : \"[%s]\" }";
StringFormatExtractionFn extractionFn = (StringFormatExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
Assert.assertEquals("[%s]", extractionFn.getFormat());
// round trip
Assert.assertEquals(
extractionFn,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFn),
ExtractionFn.class
)
);
}
}